# Covered Topics
- [Numpy](https://numpy.org/)
- [Pandas](https://pandas.pydata.org/docs/)
- Matplotlib

# Numpy

- Numpy arrays are homogeneous in nature, i.e., they comprise one data type (integer, float, double, etc.) unlike lists.
- Find all built in methods [here](https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.ndarray.html) for ndarray

In [1]:
import numpy as np

In [2]:
#creating array from list
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [3]:
#creating arrays
np.zeros(10, dtype='int')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [4]:
#creating a 3 row x 5 column matrix
np.ones((3,5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [5]:
#creating a matrix with a predefined value
np.full((3,5),1.23)

array([[1.23, 1.23, 1.23, 1.23, 1.23],
       [1.23, 1.23, 1.23, 1.23, 1.23],
       [1.23, 1.23, 1.23, 1.23, 1.23]])

In [6]:
#create an array with a set sequence
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [7]:
#create an array of even space between the given range of values
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [8]:
#create a 3x3 array with mean 0 and standard deviation 1 in a given dimension
np.random.normal(0, 1, (3,3))

array([[ 0.57041156, -0.05635949,  1.30579113],
       [-1.59609734, -1.12808653,  0.9797005 ],
       [-0.49545668, -1.2765324 , -0.11741365]])

In [9]:
#create an identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [10]:
#set a random seed
np.random.seed(0)


x1 = np.random.randint(10, size=6) #one dimension
x1

array([5, 0, 3, 3, 7, 9])

In [11]:
x2 = np.random.randint(10, size=(3,4)) #two dimension
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [12]:
x3 = np.random.randint(10, size=(3,4,5)) #three dimension
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [13]:
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size: ", x3.size)

x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size:  60


In [14]:
print("x3 median:", np.median(x3))
print("x3 mean:", x3.mean())
print("Convert x3 to list: ", x3.tolist())

x3 median: 4.0
x3 mean: 4.383333333333334
Convert x3 to list:  [[[8, 1, 5, 9, 8], [9, 4, 3, 0, 3], [5, 0, 2, 3, 8], [1, 3, 3, 3, 7]], [[0, 1, 9, 9, 0], [4, 7, 3, 2, 7], [2, 0, 0, 4, 5], [5, 6, 8, 4, 1]], [[4, 9, 8, 1, 1], [7, 9, 9, 3, 6], [7, 2, 0, 3, 5], [9, 4, 4, 6, 4]]]


### References

https://www.hackerearth.com/practice/machine-learning/data-manipulation-visualisation-r-python/tutorial-data-manipulation-numpy-pandas-python/tutorial/

# Pandas

In [15]:
import pandas as pd

In [16]:
data = pd.DataFrame({'food': ['Pastrami', 'bacon', 'pulled pork', 'bacon', 'Pastrami','corned beef', 'Bacon', 'pastrami', 'honey ham','nova lox'],
                 'ounces': [6, 4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
data

Unnamed: 0,food,ounces
0,Pastrami,6.0
1,bacon,4.0
2,pulled pork,3.0
3,bacon,12.0
4,Pastrami,6.0
5,corned beef,7.5
6,Bacon,8.0
7,pastrami,3.0
8,honey ham,5.0
9,nova lox,6.0


In [17]:
data.describe()

Unnamed: 0,ounces
count,10.0
mean,6.05
std,2.692067
min,3.0
25%,4.25
50%,6.0
75%,7.125
max,12.0


In [18]:
data.sort_values(by=['food','ounces'],ascending=True,inplace=False)

Unnamed: 0,food,ounces
6,Bacon,8.0
0,Pastrami,6.0
4,Pastrami,6.0
1,bacon,4.0
3,bacon,12.0
5,corned beef,7.5
8,honey ham,5.0
9,nova lox,6.0
7,pastrami,3.0
2,pulled pork,3.0


In [19]:
data.sort_values(by=['ounces','food'],ascending=True,inplace=False)

Unnamed: 0,food,ounces
7,pastrami,3.0
2,pulled pork,3.0
1,bacon,4.0
8,honey ham,5.0
0,Pastrami,6.0
4,Pastrami,6.0
9,nova lox,6.0
5,corned beef,7.5
6,Bacon,8.0
3,bacon,12.0


In [20]:
data.food = data.food.str.lower()
data

Unnamed: 0,food,ounces
0,pastrami,6.0
1,bacon,4.0
2,pulled pork,3.0
3,bacon,12.0
4,pastrami,6.0
5,corned beef,7.5
6,bacon,8.0
7,pastrami,3.0
8,honey ham,5.0
9,nova lox,6.0


In [21]:
data.drop_duplicates()

Unnamed: 0,food,ounces
0,pastrami,6.0
1,bacon,4.0
2,pulled pork,3.0
3,bacon,12.0
5,corned beef,7.5
6,bacon,8.0
7,pastrami,3.0
8,honey ham,5.0
9,nova lox,6.0


# Requests

In [22]:
import requests

req = requests.get('http://cat-fact.herokuapp.com/facts')

req.json()["all"][0]

{'_id': '5b1b411d841d9700146158d9',
 'text': 'The Egyptian Mau’s name is derived from the Middle Egyptian word mjw, which means cat. But contrary to its name, it’s unclear whether the modern Egyptian Mau actually originated in Egypt.',
 'type': 'cat',
 'user': {'_id': '5a9ac18c7478810ea6c06381',
  'name': {'first': 'Alex', 'last': 'Wohlbruck'}},
 'upvotes': 6,
 'userUpvoted': None}

# Read Twitch Data and Create DataFrame

- https://towardsdatascience.com/creating-a-dataset-using-an-api-with-python-dcc1607616d

In [23]:
import numpy as np
import pandas as pd
import requests
import json

url = "https://wind-bow.glitch.me/twitch-api/channels/freecodecamp"
JSONContent = requests.get(url).json()
content = json.dumps(JSONContent, indent = 4, sort_keys=True)
print(content)

{
    "_id": 79776140,
    "_links": {
        "chat": "https://api.twitch.tv/kraken/chat/freecodecamp",
        "commercial": "https://api.twitch.tv/kraken/channels/freecodecamp/commercial",
        "editors": "https://api.twitch.tv/kraken/channels/freecodecamp/editors",
        "follows": "https://api.twitch.tv/kraken/channels/freecodecamp/follows",
        "self": "https://api.twitch.tv/kraken/channels/freecodecamp",
        "stream_key": "https://api.twitch.tv/kraken/channels/freecodecamp/stream_key",
        "subscriptions": "https://api.twitch.tv/kraken/channels/freecodecamp/subscriptions",
        "teams": "https://api.twitch.tv/kraken/channels/freecodecamp/teams",
        "videos": "https://api.twitch.tv/kraken/channels/freecodecamp/videos"
    },
    "background": null,
    "banner": null,
    "broadcaster_language": "en",
    "created_at": "2015-01-14T03:36:47Z",
    "delay": null,
    "display_name": "FreeCodeCamp",
    "followers": 10122,
    "game": "Creative",
    "langua

In [24]:
# List of channels we want to access
channels = ["ESL_SC2", "OgamingSC2", "cretetion", "freecodecamp", "storbeck", "habathcx", "RobotCaleb", "noobs2ninjas",
            "ninja", "shroud", "Dakotaz", "esltv_cs", "pokimane", "tsm_bjergsen", "boxbox", "wtcn", "a_seagull",
           "kinggothalion", "amazhs", "jahrein", "thenadeshot", "sivhd", "kingrichard"]

channels_list = []
# For each channel, we access its information through its API
for channel in channels:
    JSONContent = requests.get("https://wind-bow.glitch.me/twitch-api/channels/" + channel).json()
    if 'error' not in JSONContent:
        channels_list.append([JSONContent['_id'], JSONContent['display_name'], JSONContent['status'],
                             JSONContent['followers'], JSONContent['views']])
                         
dataset = pd.DataFrame(channels_list)
dataset.head(5)

Unnamed: 0,0,1,2,3,4
0,30220059,ESL_SC2,RERUN: StarCraft 2 - Terminator vs. Parting (P...,135394,60991791
1,71852806,OgamingSC2,UnderDogs - Rediffusion - Qualifier.,40895,20694507
2,90401618,cretetion,It's a Divison kind of Day,908,11631
3,79776140,FreeCodeCamp,Greg working on Electron-Vue boilerplate w/ Ak...,10122,163747
4,86238744,storbeck,,10,1019


In [25]:
# Set names of columns
dataset.columns = ['Id', 'Name', 'Status', 'Followers', 'Views']

# Drop rows with non existent data
dataset.dropna(axis = 0, how = 'any', inplace = True)

# When dropping, there will be missing index places
# Reset index from 0 to new length of dataframe
dataset.reset_index(drop=True, inplace=True)
dataset.head(5)

Unnamed: 0,Id,Name,Status,Followers,Views
0,30220059,ESL_SC2,RERUN: StarCraft 2 - Terminator vs. Parting (P...,135394,60991791
1,71852806,OgamingSC2,UnderDogs - Rediffusion - Qualifier.,40895,20694507
2,90401618,cretetion,It's a Divison kind of Day,908,11631
3,79776140,FreeCodeCamp,Greg working on Electron-Vue boilerplate w/ Ak...,10122,163747
4,6726509,Habathcx,Massively Effective,14,764


In [26]:
# dataset.to_csv('twitch.csv', index=False)  