# Numpy 3D arrays

In [1]:
import numpy as np

d3 = np.arange(50).reshape(2,5,5)
d3

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

Indexing 3D

In [2]:
d3[1,2,1]

36

In [3]:
d3[1]

array([[25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44],
       [45, 46, 47, 48, 49]])

In [4]:
d3[0:,2:,1:3]

array([[[11, 12],
        [16, 17],
        [21, 22]],

       [[36, 37],
        [41, 42],
        [46, 47]]])

In [5]:
#d3[1:2,4:5,0:5]
d3[1:,4:,:]

array([[[45, 46, 47, 48, 49]]])

In [6]:
### d3[0:,2:4,1:3]

In [7]:
d3[1,:,3:4]

array([[28],
       [33],
       [38],
       [43],
       [48]])

# BroadCasting

In [8]:
x = np.array([[1,2,3]])  #(1,3)
y = np.array([[1],[2],[3]])#(3,1)
x+y

array([[2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

In [9]:
x.shape, y.shape

((1, 3), (3, 1))

In [10]:
x,y=np.broadcast_arrays(x,y)
x 

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [11]:
y

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

In [12]:
x+y

array([[2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

# Pandas

# Pandas data structure
Series
DataFrame

# Series

In [13]:
import pandas as pd

In [14]:
s = pd.Series([111,222,333,444,555,666,777])
s  # pandas series object

0    111
1    222
2    333
3    444
4    555
5    666
6    777
dtype: int64

In [15]:
# labels/index and values

In [16]:
#getting a single series value

s[4]

555

In [17]:
#getting a single series value
s[[5,0,3,1]]

5    666
0    111
3    444
1    222
dtype: int64

In [18]:
# updating a series value
s[3]= 1000
# updating multiple series value

s[[2,4,0]]=[9999,7777,1111]

In [19]:
s

0    1111
1     222
2    9999
3    1000
4    7777
5     666
6     777
dtype: int64

In [20]:
# deleting a value in series

del s[4]
s

0    1111
1     222
2    9999
3    1000
5     666
6     777
dtype: int64

In [21]:
# deleting multiple values in a series
del s[[0,2,6]]

# not allowed

TypeError: '[0, 2, 6]' is an invalid key

In [None]:
s[1234]= 45676878
s[[123,345,678]]=[23,23,23]
#not allowed

In [None]:
s

In [None]:
s1 = pd.Series([100,200,300,400], index=["apples", "oranges","bananas", "grapes"])

In [None]:
s1

# Creating a series using a dictionary

In [None]:
aDic = {"name":"Nasir Hussain", "Class":"AI", "Time":"5-9"}

In [None]:
aDic

In [None]:
s2 = pd.Series(aDic)
s2

In [None]:
s3 = pd.Series(s1, index=["mango", "oranges","bananas", "grapes"])

In [None]:
s3

In [None]:
s3["mango"]=100

In [None]:
s3

In [None]:
s3.values

In [None]:
s3.index

In [None]:
s3[s3>200]

In [None]:
s3>200

In [None]:
"Mango" in s3

In [None]:
s4= pd.Series(s3.values, index=[22,33,44,55])

In [None]:
s4

In [None]:
s3

In [None]:
s3+s4

# DataFrame

In [None]:
df = pd.DataFrame([123,234,345,456])
df

In [None]:
df = pd.DataFrame(["Red","Green", "Blue"],index=['v1','v2','v3'])
df

In [None]:
df = pd.DataFrame(["Red","Green", "Blue"],index=['v1','v2','v3'],columns=["Colors"])
df

# Creating a Data Frame from a dictionary

In [None]:
score = {"Ai for EO":[76,78,89,90,83,82,87,89,90,99],
         "Python1"  :[67,78,76,75,89,90,54,78,87,45],
         "Python2"  :[78,76,79,56,34,89,45,78,91,89]
        }

In [None]:
df = pd.DataFrame(score,index=["Nasir", "Asad","Ahmed",
                               "Abdullah","Faraz", "Zaid",
                               "Ali", "Khan", 'Faisal', 
                               "Fahad"])
df

In [None]:
df.head(3)

In [None]:
df.tail(3)

In [None]:
df

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
#data = pd.read_csv("banknotes.csv")

In [None]:
df['Python1']

In [None]:
df['Ai for EO']

In [None]:
df.loc[['Abdullah','Fahad']]['Python1']

In [None]:
df

In [None]:
df["extra"]=range(10)
df

In [None]:
del df["extra"]
df

In [None]:
df["Total"]=df["Ai for EO"] + df["Python1"] + df['Python2']
df

In [None]:
# df["Status"]=np.where(df['Total']>200,"Pass","Fail")
# df



In [None]:
df["Status"]=["Pass" if marks>200 else "Fails" for marks in df['Total']]

In [None]:
df

In [None]:
df['RevisedStatus']=np.where(((df['Ai for EO']>=50) & (df["Python1"]>=50) & (df["Python2"]>=50)),"Pass","Fail")

In [None]:
df

In [None]:
df['newStatus']=["Pass" if (ai>=50) & (p1>=50) & (p2>=50) else "Fail" for ai,p1,p2 in zip(df["Ai for EO"], df['Python1'],df["Python2"])]

In [None]:
df

In [None]:
x = [1,2,3,4]
y = ['a','b','c','d']
z = zip(x,y)


In [None]:
for x,y in z:
    print(x,y)

# 13-10-2019

In [None]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [None]:
frame3 = pd.DataFrame(pop)

In [None]:
frame3

In [None]:
pdata = {'Ohio': frame3['Ohio'][:-1],
         
         'Nevada': frame3['Nevada'][:2]}
pdata = pd.DataFrame(pdata)
pdata

In [None]:
frame3.index.name = 'year'; frame3.columns.name = 'state'

In [None]:
frame3

In [None]:
frame3.values

# Index Objects

In [None]:
obj = pd.Series(range(3), index=['a', 'b', 'c'])

In [None]:
obj

In [None]:
index = obj.index

In [None]:
index

In [None]:
index[1:]

# 5.2 Essential Functionality

In [None]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])

In [None]:
obj

In [None]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])


In [None]:
obj2['e']=100
obj2

In [None]:
bj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0,  4,9])

In [None]:
obj3

In [None]:
obj3.reindex(range(6))

In [None]:
obj3.reindex(range(6), method='ffill')

In [None]:
obj3.reindex(range(6), method='bfill')

In [None]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                    index=['a', 'c', 'd'],
                    columns=['Ohio', 'Texas', 'California'])
frame

In [None]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2


In [None]:
states = ['Texas', 'Utah', 'California']

In [None]:
frame.reindex(columns=states)

In [None]:
newFrame = frame.reindex(index=['a', 'b', 'c', 'd'],
                        columns=['Texas', 'Utah', 'California',
                                 "NewYork"])
newFrame

In [None]:
newFrame['NewYork']['b']=800

In [None]:
newFrame

In [None]:
frame.loc[['a', 'b', 'c', 'd'], states]

# Dropping Entries from an Axis

In [None]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])

In [None]:
obj.drop('c',inplace=True)

In [None]:
obj

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

In [None]:
data.drop(['Colorado', 'Ohio'])

In [None]:
data

In [None]:
data.drop('two', axis=1)

In [None]:
df = pd.read_csv('data.csv')

In [None]:
df

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df[df['Mid']>20].count()

In [None]:
df[(df['Mid']>20) & (df['Best of Quizzes']>7) & (df['Best of Assignments']>7)]

In [None]:
df.info()

In [None]:
df

In [None]:
df1=df.copy()

In [None]:



df1['Quiz 1'].fillna(df1['Quiz 1'].mean(),inplace=True)

In [None]:
df1

In [None]:
df.mean

# 19-10-2019

In [22]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),
                        index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,-0.012089,-1.200073,-0.153862
Ohio,0.566259,1.158389,-0.315582
Texas,0.122091,-2.14756,0.661842
Oregon,-0.604837,0.295723,1.576296


In [26]:
frame = np.abs(frame)
frame

Unnamed: 0,b,d,e
Utah,0.012089,1.200073,0.153862
Ohio,0.566259,1.158389,0.315582
Texas,0.122091,2.14756,0.661842
Oregon,0.604837,0.295723,1.576296


In [28]:
f = lambda x: x.max() - x.min()
frame.apply(f)

b    0.592748
d    1.851836
e    1.422434
dtype: float64

In [29]:
frame.apply(f, axis='columns')

Utah      1.187984
Ohio      0.842807
Texas     2.025468
Oregon    1.280573
dtype: float64

# Sorting and Ranking

In [30]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj

d    0
a    1
b    2
c    3
dtype: int64

In [31]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [32]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
                    index=['three', 'one'],
                    columns=['d', 'a', 'b', 'c'])
frame


Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [33]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [34]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [35]:
obj = pd.Series([4, 7, -3, 2])
obj

0    4
1    7
2   -3
3    2
dtype: int64

In [36]:
obj.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [48]:
frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [1,2,3,4]})
frame

Unnamed: 0,b,a
0,4,1
1,7,2
2,-3,3
3,2,4


In [49]:
frame.sort_values(by='b')

Unnamed: 0,b,a
2,-3,3
3,2,4
0,4,1
1,7,2


In [50]:
frame

Unnamed: 0,b,a
0,4,1
1,7,2
2,-3,3
3,2,4


In [52]:
frame.sort_values(by=['a', 'b'])

Unnamed: 0,b,a
0,4,1
1,7,2
2,-3,3
3,2,4


In [61]:
obj = pd.Series([7, -5, 7, 4, 2, 4,0, 4,])
obj

0    7
1   -5
2    7
3    4
4    2
5    4
6    0
7    4
dtype: int64

In [62]:
obj.rank()

0    7.5
1    1.0
2    7.5
3    5.0
4    3.0
5    5.0
6    2.0
7    5.0
dtype: float64

In [56]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                    [np.nan, np.nan], [0.75, -1.3]],
                        index=['a', 'b', 'c', 'd'],
                            columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [57]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [58]:
df.sum(axis='columns')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [60]:
df.mean(axis='columns', skipna=True)

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [67]:
frame = pd.DataFrame({'b': [1,0], 'a': [0,1]})


In [68]:
frame

Unnamed: 0,b,a
0,1,0
1,0,1


In [65]:
frame.corr()

Unnamed: 0,b,a
b,1.0,-1.0
a,-1.0,1.0


In [66]:
frame.cov()

Unnamed: 0,b,a
b,0.5,-0.5
a,-0.5,0.5


In [69]:
frame

Unnamed: 0,b,a
0,1,0
1,0,1


In [71]:
import pandas_datareader.data as web

In [72]:
all_data = {ticker: web.get_data_yahoo(ticker)
                    for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}

In [75]:
all_data

{'AAPL':                   High         Low        Open       Close      Volume  \
 Date                                                                     
 2014-10-20   99.959999   98.220001   98.320000   99.760002  77517300.0   
 2014-10-21  103.019997  101.269997  103.019997  102.470001  94623900.0   
 2014-10-22  104.110001  102.599998  102.839996  102.989998  68263100.0   
 2014-10-23  105.050003  103.629997  104.080002  104.830002  71074700.0   
 2014-10-24  105.489998  104.529999  105.180000  105.220001  47053900.0   
 2014-10-27  105.480003  104.699997  104.849998  105.110001  34187700.0   
 2014-10-28  106.739998  105.349998  105.400002  106.739998  48060900.0   
 2014-10-29  107.370003  106.360001  106.650002  107.339996  52687900.0   
 2014-10-30  107.349998  105.900002  106.959999  106.980003  40654800.0   
 2014-10-31  108.040001  107.209999  108.010002  108.000000  44639300.0   
 2014-11-03  110.300003  108.010002  108.220001  109.400002  52282600.0   
 2014-11-04  109.

In [76]:
price = pd.DataFrame({ticker: data['Adj Close'] for ticker, data in all_data.items()})

In [77]:
price

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-10-20,91.539482,136.936264,39.458977,519.413940
2014-10-21,94.026176,132.182739,40.175110,525.098328
2014-10-22,94.503311,131.016617,39.727531,531.251465
2014-10-23,96.191689,131.332458,40.300430,542.490601
2014-10-24,96.549545,131.251480,41.294075,538.302063
2014-10-27,96.448616,131.081451,41.097130,539.289368
2014-10-28,97.944298,132.482391,41.616329,547.397095
2014-10-29,98.494850,132.369003,41.732697,547.825928
2014-10-30,98.164513,133.089706,41.222458,548.803284
2014-10-31,99.100471,133.130219,42.028111,557.549255


In [78]:
volume = pd.DataFrame({ticker: data['Volume']
for ticker, data in all_data.items()})

In [79]:
volume

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-10-20,77517300.0,23416500.0,34527900.0,2607500.0
2014-10-21,94623900.0,20949800.0,36433800.0,2336200.0
2014-10-22,68263100.0,11084800.0,33570900.0,2919200.0
2014-10-23,71074700.0,7599400.0,45451900.0,2348800.0
2014-10-24,47053900.0,6652100.0,61076700.0,1973100.0
2014-10-27,34187700.0,4989100.0,30371300.0,1185300.0
2014-10-28,48060900.0,7895300.0,29049800.0,1270900.0
2014-10-29,52687900.0,4739300.0,30276100.0,1770500.0
2014-10-30,40654800.0,3896000.0,30073900.0,1455600.0
2014-10-31,44639300.0,5818000.0,35849700.0,2035000.0
