
### Sending Request usign Requests module

In [1]:
# sending request and logging the response code

import requests
res = requests.get(url='http://api.github.com/users/digvijaychouhan')
print(res)
print(type(res))

<Response [200]>
<class 'requests.models.Response'>


In [None]:
# accessing the requested content from URL
data = res.content
print(data)

In [None]:
# converting the received data from JSON to Python dictionary
data = res.json()
for k, v in data.items():
    print("Key: {} \t Value: {}".format(k, v))

In [None]:
# accessing data directly
print(data['name'])

In [None]:
# passing additional parameters in URL
response = requests.get('http://api.github.com/search/repositories?q=language:python')
repo_data= response.json()
print(repo_data['total_count'])

In [None]:
# passing parameters as separate argument
response = requests.get(url='http://api.github.com/search/repositories', params='q=language:javascript')
repo_data= response.json()
print(repo_data['total_count']) # outputting specific data

## Pandas
Pandas is flexible data analysis library writtern in 'C'. It makes working with tabular data easier in Python. Pandas is a mix of Excel and SQL.

In [None]:
# importing the pandas library and naming per standard convention i.e. 'pd'
import pandas as pd
import random 
random.seed(3)   # gives same random number to work each time

names = ['Jai', 'Akash', 'Amit', 'Sumit', 'Billy', 'Dev', 'Kumar']
ages = [random.randint(18, 35) for x in range(len(names))]
people = {
    'names': names, 
    'ages': ages
}

# using from_dict method to convert a dictionary into a DataFrame
df = pd.DataFrame.from_dict(people)   
print(df)

In [None]:
# accessing data in DataFrame using indexing by column
print(df['ages'])

In [None]:
# accessing specific column value in data 
print(df['ages'][3])

In [None]:
# accesing data using indexing by record or row. 
print(df.loc[0])    # gives first row of data

In [None]:
# accessing specific column using indexing by record
print(df.loc[0]['names'])

In [None]:
df.loc[0]['ages']

In [None]:
# slicing a DataFrame
df[2:5]

In [None]:
df[:]

In [None]:
# reversing the output
df[::-1]

In [None]:
# output last three records of DataFrame
print(df.tail(3))

In [None]:
# getting the column headers using keys() method
headers = df.keys()
print(headers)

In [None]:
# getting the size of DataFrame in rows and columns
df.shape

In [None]:
# checking general statistics of DataFrame using .describe(), it gives base analysis for all numerical data.
df.describe()

In [None]:
# sorting data on column values
df = df.sort_values('ages')
df.head()

In [None]:
# filter using condition
can_drink = df['ages'] > 21
print(can_drink)

In [None]:
# filter values using subsetting
df[df['ages'] > 21]

In [None]:
# returns top five records by default
df.head()

In [None]:
# generating new column with data 
random.seed(321)
tenure = [random.randint(0, 10) for x in range(len(df))]
# print(tenure)
df['tenure'] = tenure
df.head()

In [None]:
# using apply() to create new column
def ageGroup(age):
    return "Teenager" if age < 21 else "Adult"
df['age_group'] = df['ages'].apply(ageGroup)
df.head(10)

In [None]:
# groupby() method to summarize statistics
df.groupby('age_group', as_index=False).count()

In [None]:
# using mean on groupby()
df.groupby('age_group', as_index=False).mean()

In [None]:
df.groupby(['age_group', 'tenure'], as_index=False).count().head(10)

In [None]:
# adding new reocord in DataFrame
df.loc[7] = ['Jai', 35, 5, "Adult"]

In [None]:
df.loc[7]

In [None]:
df.head(10)

In [None]:
df = df.drop_duplicates(subset='names')

In [None]:
df.head(10)

## Pandas Join

In [None]:
# Creating a second DataFrame 

ratings = {
    "names": ['Amit', 'Billy', 'Jai'],
    "ratings": [10, 9, 6]
}

rating = df.from_dict(ratings)
rating.head()

In [None]:
# inner join 
matched_ratings = df.merge(rating, on='names', how='inner')

In [None]:
matched_ratings.head()

In [None]:
# outer join 
all_ratings = df.merge(rating, on='names', how='outer')
all_ratings.head()

In [None]:
nf = pd.read_csv("netflix1.csv")
nf.head()

In [None]:
nf.keys()

In [None]:
nf.shape

In [None]:
nf.describe()

# Matplotlib

In [None]:
from matplotlib import pyplot as plt

### Line Chart

In [None]:
# creating a line plot chart using x and y coords

x, y = [1600, 1700, 1800, 1900, 2000], [0.2, 0.5, 1.1, 2.2, 7.7]
plt.plot(x, y)
plt.title("World Population Over Time")
plt.xlabel("Year")
plt.ylabel("Population(billions)")
plt.show()

In [None]:
# creating a line plot chart with multiple lines 

x1, y1 = [1600, 1700, 1800, 1900, 2000], [0.2, 0.5, 1.1, 2.2, 7.7]
x2, y2 = [1600, 1700, 1800, 1900, 2000], [1, 1, 2, 3, 4]
plt.plot(x1, y1, "rx-", label="Actual")
plt.plot(x2, y2, "bo--", label="Fake")

plt.title("World Population Over Time")
plt.xlabel("Year")
plt.ylabel("Population(billions)")
plt.legend()
plt.show()

### Bar plot

In [None]:
# creating a bar plot using x and y coords

num_of_people, category = [4, 8, 3, 6, 2], ['Comedy', 'Action', 'Thriller', 'Romance', 'Horror']
plt.bar(category, num_of_people)
plt.title("Favorite Movie Category", fontsize=16)
plt.xlabel("Category", fontsize=14)
plt.ylabel("# of People", fontsize=14)
plt.xticks(fontname="Fantasy")
plt.yticks(fontname="Fantasy")
plt.show()

### Box plot

In [None]:
#  creating a box plot - showing height data for male-female

male, female = [72, 68, 65, 77, 71, 69], [60, 65, 69, 61, 63, 64]
heights = [male, female]
plt.figure(figsize=(15,8))
plt.boxplot(heights)
plt.xticks([1, 2], ["Male", "Female"])
plt.title("Heights by Gender", fontsize=22)
plt.xlabel("Gender", fontsize=14)
plt.ylabel("Heights(inches)", fontsize=14)
plt.show()

## Scatter plot

In [None]:
#  creating a scatter plot to represent height-weight distribution

from random import randint, seed
seed(2)
height = [randint(58, 80) for h in range(20)]
weight = [randint(50, 120) for w in range(20)]

plt.scatter(weight, height)
plt.title("Height-Weight Distribution")
plt.xlabel("Weight(kgs)")
plt.ylabel("Height(inches)")
plt.show()

## Histogram

In [None]:
# creating a histogram to represent distribution of age
import numpy as np
np.random.seed(5)
ages = [np.random.normal(loc=40, scale=10) for x in range(1000)]

plt.hist(ages, bins=45)  # bins is the number of bars
plt.title("Ages per population")
plt.xlabel("Age")
plt.ylabel("# of People")
plt.show()

In [None]:
# histogram central ditribution display

udaipur = [np.random.normal(loc=60, scale=15) for x in range(1000)]
ahmedabad = [np.random.normal(loc=35, scale=15) for x in range(1000)]
plt.figure(figsize=(15,9))
plt.title("Ages per population", fontsize=20)
plt.xlabel("Age", fontsize=14)
plt.ylabel("# of People", fontsize=14)

plt.hist(udaipur, bins=45, color="b", alpha=0.4)  
# plt.show()

plt.hist(ahmedabad, bins=45, color="r", alpha=0.4)
# plt.show()

plt.hist(udaipur, bins=45, color="b", alpha=0.4)  
plt.show()


## Saving the chart (Matplotlib)

In [None]:
# using savefig method to save chart as a jpg to the local folder

plt.figure(figsize=(12,8))
x, y = [1600, 1700, 1800, 1900, 2000], [0.2, 0.5, 1.1, 2.2, 7.7]
plt.plot(x, y, 'bo--')
plt.title("World Population Over Time")
plt.xlabel("Year")
plt.ylabel("Population(billions)")
plt.savefig("population.jpg")
plt.show()


## Flattening Multidimensional Data

In [None]:
height = [randint(58, 78) for x in range(20)]
weight = [randint(40, 110) for x in range(20)]
age = [randint(18, 65) for x in range(20)]
# plt.figure(figsize=(10, 9))
plt.scatter(weight, height, c=age)
plt.title("Height-Weight Distribution", fontsize=20)
plt.xlabel("Height(inches)", fontsize=14)
plt.ylabel("Weight(kgs)", fontsize=14)
plt.colorbar(label="Age")
plt.show()

In [None]:
x = sorted([randint(1, 11) for x in range(20)])
y = sorted([randint(1, 11) for y in range(20)])
z = sorted([randint(1, 11) for z in range(20)])
points = [x for x in range(1, 21)]
plt.plot(x, points, "ro--")
plt.plot(y, points, "bx-")
plt.plot(z, points, "g*-")
plt.show()

In [None]:
def createBarPlot(movie, people, movie_name):
    rating = [k for k in movie.keys()]
    count = [v for v in movie.values()]
    
    plt.title(movie_name, fontsize=20)
    
    plt.xlabel("Rating", fontsize=14)
    plt.ylabel("# of People", fontsize=14)
    plt.xticks(fontname="Fantasy")
    plt.bar(rating, count)
    plt.show()

def main():
    movie ={} 
    people = []
    cnt = 0
    to_continue = True

    movie_name = input("What movie you are rating? ").title()
    while to_continue:
        cnt += 1

        rating = int(input("What would you rate this movie (1-5)? "))
        if rating in movie:
            movie[rating] += 1
        else: 
            movie[rating] = 1

        to_continue = input("Is ther another user that would like to review (y/n)? ")
        if to_continue == "n":
            to_continue = False

        people.append(cnt)
    createBarPlot(movie, people, movie_name)
    
main()
    

## BeautifulSoup

In [2]:
from bs4 import BeautifulSoup
import requests

page = requests.get("https://arthurleej.com/e-love.html")
print(page)

<Response [200]>


In [None]:
print(page.content)

In [None]:
soup = BeautifulSoup(page.content, "html.parser")
print(soup.prettify())

In [None]:
title = soup.find("b")
print(title)

In [None]:
print(title.get_text())

In [None]:
poem_text = soup.find_all("b")
for text in poem_text:
    print(text.get_text())

In [None]:
print(soup.children)

In [None]:
for child in soup.children:
    print(type(child))

In [None]:
# to access the tag object in the children i.e. element.Tag
html= list(soup.children)[2]
for section in html:
    print("\n\nStart of new section")
    print(section)



In [None]:
# accessing the head element
head = list(html.children)[1]
for item in head:
    print("\n\nNew Tag")
    print(item)

In [None]:
title = list(head)[1]
print(title.get_text())

In [9]:
page2 = requests.get("https://www.york.ac.uk/study/")
soup2 = BeautifulSoup(page2.content, "html.parser")
paras = soup2.find_all("p")
for para in paras:
    text = para.find_all(text=True)
    
    output = ''
    blacklist = [
        '[document]',
        'noscript',
        'header',
        'html',
        'meta',
        'head', 
        'input',
        'script',
        "style",
        "noscript",
    ]

    for t in text:
        if t.parent.name not in blacklist:
            output += '{} '.format(t)

    print(output)

Study at York 
Study at York and you'll graduate with more than a qualification. 
You'll be immersed in knowledge and opportunities that will encourage you to grow and succeed. You’ll grapple with big ideas and be encouraged to think differently by academics whose research is changing the world. 
We're proud to announce that we were shortlisted for two prestigious University of the Year awards: 
Find out more about our nominations 
Online degrees at York offer the opportunity to study in a more flexible way. 
Read more 
We offer over 300 courses per year in a diverse range of subjects. 
Read more 
Learn about our massive open online courses (MOOCs), designed by York academics. 
Read more 
From the moment you arrive on campus you'll be welcomed as a member of one of our ten colleges. Through societies, sports clubs, art, music  and  college life there is always something to be part of, both on campus and in the city. 
Take our online campus tour 
Explore student life at York 
Find out a

In [4]:
page3 = requests.get("https://en.wikipedia.org/wiki/List_of_current_National_Football_League_stadiums")
soup3 = BeautifulSoup(page3.content, "html.parser")

stadium_names = []
rows = soup3.select("table")[0].find("tbody").find_all("tr")
for row in rows:
    cells= row.find_all("th", scope="row")
    for cell in cells:
        name = cell.find("a")
        stadium_names.append(name)
#         print(name.get_text())

print(f"There are total {len(stadium_names)} Football league Stadiums across US.")

There are total 30 Football league Stadiums across US.
