In [1]:
#arrangment of keeping data = data structures
#two main data structures of pandas: series and dataframes
import pandas as pd

### Series

In [2]:
s1 = pd.Series([11,22,33,44,55,66,77,88,99,100])
s1 # Pandas series object

0     11
1     22
2     33
3     44
4     55
5     66
6     77
7     88
8     99
9    100
dtype: int64

In [3]:
# 0 to 9 are default index given by series object
s1[9] # accessing value by index

100

In [4]:
s1[[2,4,7,9,0]] # accessing multiple values

2     33
4     55
7     88
9    100
0     11
dtype: int64

In [5]:
s1[5] = 5555
s1[[2,4,6]] = [333,444,555]
s1

0      11
1      22
2     333
3      44
4     444
5    5555
6     555
7      88
8      99
9     100
dtype: int64

In [6]:
del s1[0] # deleting a value in series

In [7]:
s1

1      22
2     333
3      44
4     444
5    5555
6     555
7      88
8      99
9     100
dtype: int64

In [8]:
s1[99] = 999 # insert in a series with a new index
s1

1       22
2      333
3       44
4      444
5     5555
6      555
7       88
8       99
9      100
99     999
dtype: int64

### Defining our own index:

In [9]:
s2 = pd.Series([10,20,30,40],index = ["oranges","apples","bananas","kiwi"])
s2

oranges    10
apples     20
bananas    30
kiwi       40
dtype: int64

In [10]:
s2.index

Index(['oranges', 'apples', 'bananas', 'kiwi'], dtype='object')

In [11]:
s2.values

array([10, 20, 30, 40], dtype=int64)

In [12]:
ourIndex = ["oranges","apples","bananas","kiwi"]
s2 = pd.Series([10,20,30,40],index = ourIndex)
s2

oranges    10
apples     20
bananas    30
kiwi       40
dtype: int64

In [13]:
s2[s2 < 30]

oranges    10
apples     20
dtype: int64

In [14]:
'peach' in s2 # checking if required key is in series or not

False

In [15]:
'oranges' in s2 # checking if required key is in series or not

True

### Creating a series by using a dictionary

In [16]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
data = pd.Series(sdata)
data

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [17]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
data1 = pd.Series(sdata,index = states)
data1

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [18]:
data1.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [19]:
data1.notnull()

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [20]:
data1['California'] = 34000
data1 

California    34000.0
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

### Dataframes

In [21]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year':  [2000, 2001, 2002, 2001, 2002, 2003],
        'pop':   [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
df1 = pd.DataFrame(data)
df1

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [22]:
mydata = {
    'Name':   ['John', 'Sara', 'Bob', 'William'],
    'Age':    [20,19,22,18],
    'Gender': ['male','female','male', 'male' ]
}
df2 = pd.DataFrame(mydata)
df2

Unnamed: 0,Name,Age,Gender
0,John,20,male
1,Sara,19,female
2,Bob,22,male
3,William,18,male


In [23]:
type(df2)

pandas.core.frame.DataFrame

In [24]:
df2.shape

(4, 3)

In [25]:
df2['Age'] # selecting single columns by column header

0    20
1    19
2    22
3    18
Name: Age, dtype: int64

In [26]:
df2[['Age', 'Gender']] # selecting double columns by column header

Unnamed: 0,Age,Gender
0,20,male
1,19,female
2,22,male
3,18,male


In [27]:
df2.Age

0    20
1    19
2    22
3    18
Name: Age, dtype: int64

In [28]:
del df2['Gender'] # deleting a column 
df2

Unnamed: 0,Name,Age
0,John,20
1,Sara,19
2,Bob,22
3,William,18


In [29]:
df2['Roll no.'] = 0
df2

Unnamed: 0,Name,Age,Roll no.
0,John,20,0
1,Sara,19,0
2,Bob,22,0
3,William,18,0


In [30]:
df2['Roll no.'] = [11,23,45,14]
df2

Unnamed: 0,Name,Age,Roll no.
0,John,20,11
1,Sara,19,23
2,Bob,22,45
3,William,18,14


In [31]:
df2['Roll no.'] = range(4)
df2

Unnamed: 0,Name,Age,Roll no.
0,John,20,0
1,Sara,19,1
2,Bob,22,2
3,William,18,3


In [32]:
df2['Roll no.'] = df2['Age']
df2

Unnamed: 0,Name,Age,Roll no.
0,John,20,20
1,Sara,19,19
2,Bob,22,22
3,William,18,18


In [33]:
dic = {
    "AI For Everyone": [78,98,70,97,98,86,78,69],
    "Python1": [78,67,97,69,50,86,95,75],
    "Python2": [68,97,69,50,76,95,87,69]
}
df = pd.DataFrame(dic, index = ['Nasir', 'Asad', 
                                'Ramsha', 'Mansoor', 
                                'Saba', 'Saba', 
                                'Hina', 'Ali'])
df

Unnamed: 0,AI For Everyone,Python1,Python2
Nasir,78,78,68
Asad,98,67,97
Ramsha,70,97,69
Mansoor,97,69,50
Saba,98,50,76
Saba,86,86,95
Hina,78,95,87
Ali,69,75,69


In [34]:
total = df['AI For Everyone'] + df['Python1'] + df['Python2']
df['Total'] = total
df

Unnamed: 0,AI For Everyone,Python1,Python2,Total
Nasir,78,78,68,224
Asad,98,67,97,262
Ramsha,70,97,69,236
Mansoor,97,69,50,216
Saba,98,50,76,224
Saba,86,86,95,267
Hina,78,95,87,260
Ali,69,75,69,213


In [35]:
df['status'] = (total>250)


In [36]:
numbers = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
evenArr = []
oddArr = []
for num in numbers:
    if num%2 == 0:
        evenArr.append(num)
    else:
        oddArr.append(num)
        
print("Evens : ", evenArr)
print("Odds : ", oddArr)

Evens :  [2, 4, 6, 8, 10, 12, 14]
Odds :  [1, 3, 5, 7, 9, 11, 13, 15]


In [37]:
[num for num in numbers if num%2==0]

[2, 4, 6, 8, 10, 12, 14]

In [55]:
import numpy as np
df['status'] = np.where(((df['AI For Everyone']<70)&(df['Python1']<70)&(df['Python2']<70)),"pass","fail")
df

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage,status
Nasir,78,78,68,224,Fail,74.666667,fail
Asad,98,67,97,262,Fail,87.333333,fail
Ramsha,70,97,69,236,Fail,78.666667,fail
Mansoor,97,69,50,216,Fail,72.0,fail
Saba,98,50,76,224,Fail,74.666667,fail
Saba,86,86,95,267,Pass,89.0,fail
Hina,78,95,87,260,Fail,86.666667,fail
Ali,69,75,69,213,Fail,71.0,fail


In [38]:
df['Status']=["Pass" if a >= 80 and p1 >= 80 and p2 >= 80 else "Fail" for a,p1,p2 in zip(df['AI For Everyone'],df['Python1'],df['Python2']) ]
df

Unnamed: 0,AI For Everyone,Python1,Python2,Total,status,Status
Nasir,78,78,68,224,False,Fail
Asad,98,67,97,262,True,Fail
Ramsha,70,97,69,236,False,Fail
Mansoor,97,69,50,216,False,Fail
Saba,98,50,76,224,False,Fail
Saba,86,86,95,267,True,Pass
Hina,78,95,87,260,True,Fail
Ali,69,75,69,213,False,Fail


In [39]:
del df['status']

In [40]:
df

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status
Nasir,78,78,68,224,Fail
Asad,98,67,97,262,Fail
Ramsha,70,97,69,236,Fail
Mansoor,97,69,50,216,Fail
Saba,98,50,76,224,Fail
Saba,86,86,95,267,Pass
Hina,78,95,87,260,Fail
Ali,69,75,69,213,Fail


In [41]:
df["Percentage"] = df["Total"]/300* 100

In [42]:
df

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Nasir,78,78,68,224,Fail,74.666667
Asad,98,67,97,262,Fail,87.333333
Ramsha,70,97,69,236,Fail,78.666667
Mansoor,97,69,50,216,Fail,72.0
Saba,98,50,76,224,Fail,74.666667
Saba,86,86,95,267,Pass,89.0
Hina,78,95,87,260,Fail,86.666667
Ali,69,75,69,213,Fail,71.0


In [43]:
#head return by default first 5 rows if explicity told it will return that number
df.head()

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Nasir,78,78,68,224,Fail,74.666667
Asad,98,67,97,262,Fail,87.333333
Ramsha,70,97,69,236,Fail,78.666667
Mansoor,97,69,50,216,Fail,72.0
Saba,98,50,76,224,Fail,74.666667


In [44]:
df.tail()

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Mansoor,97,69,50,216,Fail,72.0
Saba,98,50,76,224,Fail,74.666667
Saba,86,86,95,267,Pass,89.0
Hina,78,95,87,260,Fail,86.666667
Ali,69,75,69,213,Fail,71.0


In [45]:
df.loc['Mansoor'] # indexing

AI For Everyone      97
Python1              69
Python2              50
Total               216
Status             Fail
Percentage           72
Name: Mansoor, dtype: object

In [46]:
#slicing
df.loc['Nasir':'Hina'] # also adds the last one

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Nasir,78,78,68,224,Fail,74.666667
Asad,98,67,97,262,Fail,87.333333
Ramsha,70,97,69,236,Fail,78.666667
Mansoor,97,69,50,216,Fail,72.0
Saba,98,50,76,224,Fail,74.666667
Saba,86,86,95,267,Pass,89.0
Hina,78,95,87,260,Fail,86.666667


In [47]:
df.iloc[0]

AI For Everyone         78
Python1                 78
Python2                 68
Total                  224
Status                Fail
Percentage         74.6667
Name: Nasir, dtype: object

In [48]:
df.iloc[0:6]#one includes the last one

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Nasir,78,78,68,224,Fail,74.666667
Asad,98,67,97,262,Fail,87.333333
Ramsha,70,97,69,236,Fail,78.666667
Mansoor,97,69,50,216,Fail,72.0
Saba,98,50,76,224,Fail,74.666667
Saba,86,86,95,267,Pass,89.0


In [49]:
df.loc['Hina'::-1]

Unnamed: 0,AI For Everyone,Python1,Python2,Total,Status,Percentage
Hina,78,95,87,260,Fail,86.666667
Saba,86,86,95,267,Pass,89.0
Saba,98,50,76,224,Fail,74.666667
Mansoor,97,69,50,216,Fail,72.0
Ramsha,70,97,69,236,Fail,78.666667
Asad,98,67,97,262,Fail,87.333333
Nasir,78,78,68,224,Fail,74.666667


In [50]:
df.loc['Mansoor']['Python2']

50

In [51]:
df.loc['Amjad'] = [1]

ValueError: cannot set a row with mismatched columns