# Numpy 3D arrays

In [1]:
import numpy as np

In [2]:
d3 = np.arange(50).reshape(2,5,5)
d3

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [3]:
# Indexing 3D

In [4]:
d3[1,2,1]

36

In [5]:
# Slicing 3D

In [6]:
d3[0,2:4,1:3]

array([[11, 12],
       [16, 17]])

In [7]:
d3[1:,4:,:]

array([[[45, 46, 47, 48, 49]]])

In [8]:
d3[0:,2:4,1:3]

array([[[11, 12],
        [16, 17]],

       [[36, 37],
        [41, 42]]])

In [9]:
d3[1:,:,3:4]

array([[[28],
        [33],
        [38],
        [43],
        [48]]])

# broadcasting in Numpy

In [10]:
x = np.array([[1,2,3]])
y = np.array([[1],[2],[3]])

x.shape, y.shape

((1, 3), (3, 1))

In [11]:
x, y = np.broadcast_arrays(x,y)

In [12]:
a = np.array([[1,2,3]])
b = np.array([[1],[2],[3]])

a + b


array([[2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

In [13]:
x

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [14]:
y

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

# PANDAS

Pandas data structures type
- series
- data frame

## Series

In [62]:
import pandas as pd
from pandas import Series, DataFrame

In [16]:
s = pd.Series([111,222,333,444,555,666,777])
s

0    111
1    222
2    333
3    444
4    555
5    666
6    777
dtype: int64

series includes labels/index and values

In [17]:
# getting a series value using indexing
s[4]

555

In [18]:
# getting multiple value from series
s[[5,0,3,1]]

5    666
0    111
3    444
1    222
dtype: int64

In [19]:
# updating a series value
s[3] = 10000
s[3]

10000

In [20]:
# udpating multiple value in a series
s[[2,4,0]] = [999,1112,332]
s

0      332
1      222
2      999
3    10000
4     1112
5      666
6      777
dtype: int64

In [21]:
# deleting a value in series
del s[4]
s

# multiple values can not be deleted in a series

0      332
1      222
2      999
3    10000
5      666
6      777
dtype: int64

In [24]:
# inserting value to series
s[7] = 989898

# inserting multiple values is not supported in a series
s

0       332
1       222
2       999
3     10000
5       666
6       777
7    989898
dtype: int64

In [26]:
# creating a series with custom index 
sl = pd.Series([1,3,4,6], index = ["a", "b","c","d"])
sl

a    1
b    3
c    4
d    6
dtype: int64

In [28]:
# creating a series from dict

aDic = {"name": "azfar", "class":"ai", "time":"5 to 9"}

s2 = pd.Series(aDic)
s2

name      azfar
class        ai
time     5 to 9
dtype: object

In [30]:
# copy a series from another
s3 = pd.Series(sl,index = ["e","b","c","d"])
s3

e    NaN
b    3.0
c    4.0
d    6.0
dtype: float64

In [31]:
s3.index  # to get index of a series

Index(['e', 'b', 'c', 'd'], dtype='object')

In [32]:
s3[s3>3]


c    4.0
d    6.0
dtype: float64

In [36]:
"c" in s3 

True

In [37]:
pd.isnull(s3)

e     True
b    False
c    False
d    False
dtype: bool

In [38]:
pd.notnull(s3)

e    False
b     True
c     True
d     True
dtype: bool

In [47]:
s3, s4

(e    NaN
 b    3.0
 c    4.0
 d    6.0
 dtype: float64, b    7
 e    8
 dtype: int64)

In [44]:
s4 = Series([7,8], index = ["b", "e"])

s4 + s3

b    10.0
c     NaN
d     NaN
e     NaN
dtype: float64

# DATAFRAME

In [48]:
df = DataFrame([12,13,14,15])
df

Unnamed: 0,0
0,12
1,13
2,14
3,15


In [53]:
df2 = DataFrame([["red","a"],["gree","b"],["blue","c"]], index = ["v1","v2","v3"], columns=["color","azfar"])
df2

Unnamed: 0,color,azfar
v1,red,a
v2,gree,b
v3,blue,c


In [139]:
# Creating a dataframe from dictionary
score = {
    "AI for E1": [76,78,79,80,89],
    "Python1": [76,78,66,87,91],
    "Python2": [76,77,66,41,51]        
}

score_df = DataFrame(score, index=["azfar","ali","fashi","syed","momiz"])

score_df

Unnamed: 0,AI for E1,Python1,Python2
azfar,76,76,76
ali,78,78,77
fashi,79,66,66
syed,80,87,41
momiz,89,91,51


In [140]:
score_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, azfar to momiz
Data columns (total 3 columns):
AI for E1    5 non-null int64
Python1      5 non-null int64
Python2      5 non-null int64
dtypes: int64(3)
memory usage: 160.0+ bytes


In [141]:
score_df.describe()

Unnamed: 0,AI for E1,Python1,Python2
count,5.0,5.0,5.0
mean,80.4,79.6,62.2
std,5.029911,9.813256,15.801899
min,76.0,66.0,41.0
25%,78.0,76.0,51.0
50%,79.0,78.0,66.0
75%,80.0,87.0,76.0
max,89.0,91.0,77.0


In [142]:
df1 = score_df

In [143]:
df1.Python1

azfar    76
ali      78
fashi    66
syed     87
momiz    91
Name: Python1, dtype: int64

In [144]:
df1[['AI for E1', 'Python1']]

Unnamed: 0,AI for E1,Python1
azfar,76,76
ali,78,78
fashi,79,66
syed,80,87
momiz,89,91


In [145]:
df1.loc[['azfar','fashi']][['Python1','AI for E1']]

Unnamed: 0,Python1,AI for E1
azfar,76,76
fashi,66,79


In [146]:
df1["percentage"] = (df1['Python1'] + df1['AI for E1'] + df1['Python2'])/3
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage
azfar,76,76,76,76.0
ali,78,78,77,77.666667
fashi,79,66,66,70.333333
syed,80,87,41,69.333333
momiz,89,91,51,77.0


In [147]:
df1[df1["percentage"] > 75][["AI for E1","Python1","Python2","percentage"]]

Unnamed: 0,AI for E1,Python1,Python2,percentage
azfar,76,76,76,76.0
ali,78,78,77,77.666667
momiz,89,91,51,77.0


In [148]:
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage
azfar,76,76,76,76.0
ali,78,78,77,77.666667
fashi,79,66,66,70.333333
syed,80,87,41,69.333333
momiz,89,91,51,77.0


raw_data = pd.read_excel("D:\Azfar\Personal\Programming_Projects\PIAIC\Quarter2\Class Notes\data\Spencer_Hoy_Invoice_Details.xlsx")
df1 = DataFrame(raw_data[['DocumentId', 'UploadDate', 'DocumentSource','ProcessingStatus', 'DocumentSubType','InvoiceNo']])
df1.head()

In [152]:
df1["total"] = df1['Python1'] + df1['AI for E1'] + df1['Python2']
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total
azfar,76,76,76,76.0,228
ali,78,78,77,77.666667,233
fashi,79,66,66,70.333333,211
syed,80,87,41,69.333333,208
momiz,89,91,51,77.0,231


In [157]:
df1['PassStatus'] = df1[['total']] > 210
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total,PassStatus
azfar,76,76,76,76.0,228,True
ali,78,78,77,77.666667,233,True
fashi,79,66,66,70.333333,211,True
syed,80,87,41,69.333333,208,False
momiz,89,91,51,77.0,231,True


In [164]:
df1['grade'] = np.where(df1['percentage'] > 80, 'A+', np.where(df1['percentage'] > 70, 'A', np.where(df1['percentage'] > 60,'B', 'Fail')))
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total,PassStatus,grade
azfar,76,76,76,76.0,228,True,A
ali,78,78,77,77.666667,233,True,A
fashi,79,66,66,70.333333,211,True,A
syed,80,87,41,69.333333,208,False,B
momiz,89,91,51,77.0,231,True,A


In [167]:
df1['PassStatus2'] = ["UpperClass" if total > 220 else "LowerClass" for total in df1['total']]
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total,PassStatus,grade,PassStatus2
azfar,76,76,76,76.0,228,True,A,UpperClass
ali,78,78,77,77.666667,233,True,A,UpperClass
fashi,79,66,66,70.333333,211,True,A,LowerClass
syed,80,87,41,69.333333,208,False,B,LowerClass
momiz,89,91,51,77.0,231,True,A,UpperClass


In [169]:
df1['RevisedStatus'] = np.where(((df1['AI for E1'] >= 70) & (df1['Python1'] >= 70) & (df1['Python2'] >= 70)), 'All Pass', 'Fail')
df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total,PassStatus,grade,PassStatus2,RevisedStatus
azfar,76,76,76,76.0,228,True,A,UpperClass,All Pass
ali,78,78,77,77.666667,233,True,A,UpperClass,All Pass
fashi,79,66,66,70.333333,211,True,A,LowerClass,Fail
syed,80,87,41,69.333333,208,False,B,LowerClass,Fail
momiz,89,91,51,77.0,231,True,A,UpperClass,Fail


In [174]:
df1['newStatus'] = ["All Pass" if ((ai> 70) & (p1 > 70) & (p2 > 70)) else "Fail" for ai,p1,p2 in zip(df1["AI for E1"],df1["Python1"],df1["Python2"])]

df1

Unnamed: 0,AI for E1,Python1,Python2,percentage,total,PassStatus,grade,PassStatus2,RevisedStatus,newStatus
azfar,76,76,76,76.0,228,True,A,UpperClass,All Pass,All Pass
ali,78,78,77,77.666667,233,True,A,UpperClass,All Pass,All Pass
fashi,79,66,66,70.333333,211,True,A,LowerClass,Fail,Fail
syed,80,87,41,69.333333,208,False,B,LowerClass,Fail,Fail
momiz,89,91,51,77.0,231,True,A,UpperClass,Fail,Fail


In [176]:
x = [1, 2]
y = [1, 2]

z = zip(x, y)

for x, y in z:
    print(x,y)

1 1
2 2
