# Python External Libraries

In [None]:
## I will cover the most important libraries that will help you 
## in the AI/Machine Learning/Neural network course

## Access to the cheat sheet in this link 
## https://www.datacamp.com/community/data-science-cheatsheets?page=3

## We will cover 
## Numpy
## Pandas
## Matplotlib

# Numpy library

In [None]:
## import the library numpy and use np as shortcut for it
import numpy as np

## Creating arrays
a = np.array([1, 2, 3]) # 1D array
print(a)
print(type(a)) ## numpy. n dimensional array

b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)




In [None]:
## list vs array 
my_list = [1,2,3]
# print(my_list + 2) # gives error
a = np.array([1, 2, 3])
print(a+2)

In [None]:
## Initial placeholders

## create an array of zeros
print(np.zeros((3,4))) # create 2D array of zeros with 3 rows and 4 columns

## create an array of ones
print(np.ones((3,4))) # create 2D array of ones

## create an array of evenly spaced values (start,end,step) us arange
print(np.arange(10,25,5))

## create an array of evenly spaced values (number of samples)
print(np.linspace(0,2,9))

## create a constant array
print(np.full((3,4),1)) # create a 3 by 4 array with value of 1

## create a n by n identity matrix with eye(n)
print(np.eye(3))

In [None]:
## Input/Output
 
## save an array to a file in Numpy .npy format
a = np.array([1, 2, 3])
np.save('my_array',a) 

## Save several arrays into a single file in uncompressed .npz format
b = np.array([4, 5, 6])
np.savez('my_multiple_array.npz', a = a, b = b)

In [None]:
## load .npy file with single array
np.load('my_array.npy')

In [None]:
## load .npz 
d = np.load('my_multiple_array.npz')
print(d['a'])
print(d['b'])

In [None]:
## load data from a text file
my_array = np.loadtxt("myarrayfile.txt")
print(my_array)
print(my_array[0])

## load data from a csv file 
my_array = np.genfromtxt("mycsvfile.csv", delimiter = ',')
print(my_array)

In [None]:
## Inspecting array
## we can get the shape or dimension of the our array using shape attribute
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b.shape) # (number of rows, number of columns)
print(b.shape[0]) # number of rows
print(b.shape[1]) # number of columns

## len(array) length of array (used for 1D array mostly)
print(len(b)) # number of rows
print(len(b[1])) # number of columns

## ndim is an attribute to find number dimensions in array 
print(b.ndim)

In [None]:
## Array Mathematics 
a = np.array([1,2,3])
b = np.array([4,5,6])

## Addition (add) /subtraction (subtract) 
## multiplication (multiply)  /division (divide) 
g1 = a+b
g2 = np.add(a,b)
print(g1)
print(g2)

g3 = a/b
g4 = np.divide(a,b)
print(g3)
print(g4)

## Exponentiation 
print(np.exp(a)) # e^(all elements) we call it elementwise operation

## Dot product (array_1.dot(array_2))
print(a.dot(b))

In [None]:
## Comparison
a = np.array([1,2,3])
b = np.array([4,5,6])
print(a>b)

print(a<2)

In [None]:
## Aggregate Functions
b = np.array([[1, 2, 3], [4, 5, 6]])

## array wise sum (sum all the elements in the array, over all axes)
print(b.sum())

## array wise sum along the rows
print(b.sum(0))

## array wise sum along the columns 
print(b.sum(1))

## min, max, mean, median, sort.. all work the same way

In [None]:
b = np.array([[1, 2, 3], [4, 5, 6]])
h=b.view()
print(b)
print(h)

In [None]:
## Access the elements of the array with indexing 
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b[0][2]) ## array[row][column]

## Change the element of array 
b[0][0] = 10
print(b)

## Slicing
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b[0:2,1]) # Take the first two rows and the second column\

## Boolean indexing (return 1D array of all elements that meet the condition)
b = b[b<5]

In [None]:
## Array Manipulation 
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)
print(b.shape)

## Transposing array
b_T = np.transpose(b)
print(b_T)
print(b_T.shape)


## Change array shape 

## Flatten the array (make it 1D) using ravel
print(b.ravel())

## reshape (keep the original array, just create a new one with the new shape)
b = np.array([[1, 2, 3], [4, 5, 6]])
c = b.reshape(3,2)
print(b)
print(c)

## Adding/Removing elements 

## resize (change the original array shape)
b = np.array([[1, 2, 3], [4, 5, 6]])
c = b.resize(3,2)
print(b)
print(c)

## append add items to the array
b = np.array([[1, 2, 3], [4, 5, 6]])
np.append(b,[7,8,9]) ## reshape it into 1D array


## The append on a specific axis requires the shape of both arrays to be 
## the same 

## append add items to the array on axis 0 (rows)
b = np.array([[1, 2, 3], [4, 5, 6]])
c = b*2
print(np.append(b,c,axis=0)) ## add the new arrays as rows

## append add items to the array on axis 1 (columns)
b = np.array([[1, 2, 3], [4, 5, 6]])
c = b*2
print(np.append(b,c,axis=1)) ## add the new arrays as columns

In [None]:
## Splitting array
b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

## split array horizontally at specific index i (np.hsplit(array,index))
## it has to split the array into two arrays with equal size
l, r = np.hsplit(b,2)
print(l)
print(r)

# u,l = np.vsplit(b,2) # gives error since the arrays don't split equally

In [None]:
## Broadcasting 
a = np.array([[1, 2, 3, 4]])
b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
a+b

# Pandas

In [None]:
## Pandas cheat sheet 
## https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf

In [None]:
## Import pandas 
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame({"a":[4, 5, 6], "b": [7, 8, 9], "c": [10, 11, 12]})
print(df)
print('')
df = pd.DataFrame({"a":[4, 5, 6], "b": [7, 8, 9], "c": [10, 11, 12]}, index = [1,2,3])
print(df)
print('')

df = pd.DataFrame([[4,5,6],[7,8,9],[10,11,12]],
                  index = [1, 2, 3], 
                  columns = ['a','b','c'])

print(df)

In [None]:
## To read excel file use this 
#df = pd.read_excel(r'C:/Users/jalan/Desktop/my_data.xlsx')

## To read CSV
df = pd.read_csv('C:/Users/jalan/Desktop/adult.csv')
#(df.head()) ## print the first 5 rows

#df.tail(10) ## print the last 10 rows

In [None]:
## Get the data type of each column
df.dtypes

In [None]:
## Get the index range
df.index

In [None]:
## get the columns
df.columns

In [None]:
df.values

In [None]:
## Statistical summary 
df.describe()

In [None]:
## sort by using any column 
df.sort_values('Age',ascending=True) # lowest to highest

df.sort_values('Age',ascending=False) # highest to lowest



In [None]:
df.head()

In [None]:
## Slice

# ## slice one column age
# print(df['Age'])

# ## slice two columns age and workclass
# print(df[['Age','Workclass']])

## Slice indexs
# print(df.loc[2:4])

# ## slice with specific columns
#print(df.loc[2:4,['Age','Workclass']])

# ## index with specific columns
print(df.loc[4,['Age','Workclass']])




In [None]:
## filtering 
df[df.Age >40] ## return only result that pass this condition

In [None]:
## filtering 

df[df['Age']. isin([40])]

In [None]:
## Change elements 
print(df.loc[0,['Age']])

df.loc[0,['Age']] = 60

df.loc[0,['Age']]


df['new column'] = df.Age+5
df

In [None]:
## Rename columns

df = df.rename(columns = {'Age':'age'} )
df.head()

## To change all the column names use
#df.columns = ['name1','name2'..]

In [None]:
## for loop
for index, row in df.iterrows():
    print(index, row['Education'])

In [None]:
## export a dataframe to csv file at the same folder of jupyter notebook
df.to_csv('name of the file.csv')

## export a dataframe to csv file at different folder than your jupyter notebook
#df.to_csv(r'Path where you want to store the exported excel file\File Name.csv')

## export a dataframe to xlsx file
#df.to_excel(r'Path where you want to store the exported excel file\File Name.xlsx')


# Matplotlib

In [None]:
## Matplotlib is a python 2D plotting library 

In [None]:
import matplotlib.pyplot as plt # Import pyplot module from the matplotlib package and use plt as shortcut to call it
import numpy as np # Always import numpy when importing matplotlib

In [None]:
## Plot two lists
plt.plot([0,1,2,3,4,5],[0,1,4,9,16,25])
plt.axis([0,6,0,20])
plt.xlabel('x') # x-axis label
plt.ylabel('y') # y=axis label
plt.title('y = x^2')

In [None]:
x = np.linspace(0,3,10) # define an array from 0 to 3 with 10 intervals 
y = np.cos(x)
y2 = np.sin(x)
y3 = np.exp(x)

In [None]:
print(x)

In [None]:
plt.scatter(x,y) ## plot x,y as scattered plot 
plt.xlabel('x')
plt.ylabel('y')
plt.title('cos(x)')
plt.show() ## Display the plot (if you remove the line, the next plot will show with this one)

plt.plot(x, y2, 'k--') ## plot x,y as connected plot 
plt.xlabel('x')
plt.ylabel('y')
plt.title('sin(x)')
plt.show()

plt.plot(x, y3, 'r')
plt.xlabel('x')
plt.ylabel('y')
plt.title('e^(x)')



In [None]:
import random # import random package
mean = 100
SD = 15
## np.random.randn(n) : generate an array with length n, filled with random floating-point samples from 
## the standard normal distribution.
x = mean + SD * np.random.randn(10000)

In [None]:
## generate histogram plot
plt.hist(x,40, facecolor = 'g')

In [None]:
## Create subplots
x = np.linspace(0,3,10)
y = np.cos(x)
y2 = np.sin(x)
y3 = np.exp(x)
y4 = np.log2(x)
fig, axs = plt.subplots(2, 2) ## define how many subplots (2 by 2) means 4 plots
axs[0, 0].plot(x, y) 
axs[0, 0].set_title('cos(x)')
axs[0, 1].plot(x, y, 'tab:orange')
axs[0, 1].set_title('sin(x)')
axs[1, 0].plot(x, -y, 'tab:green')
axs[1, 0].set_title('exp(x)')
axs[1, 1].plot(x, -y, 'tab:red')
axs[1, 1].set_title('log2(x)')

In [None]:
## Use this link to pratice more 
## https://www.w3resource.com/python-exercises/