###### Create an Empty Series

In [1]:
import pandas as pd

In [2]:
s = pd.Series()
s

Series([], dtype: float64)

###### Create a Series from ndarray
If data is an ndarray, then index passed must be of the same length. If no index is passed, then by default index will be range(n) where n is array length, i.e., [0,1,2,3…. range(len(array))-1].

In [3]:
import numpy as np
data = np.array(['a','b','c','d'])
ser1 = pd.Series(data)
print(ser1)

0    a
1    b
2    c
3    d
dtype: object


In [15]:
data = np.array(['a','b','c','d'])
ser2 = pd.Series(data,index=[101,102,103,104]) #manually passing the index of the array
ser2

101    a
102    b
103    c
104    d
dtype: object

###### Create a series from Dictionary

In [18]:
dic_a = {"a":1,"b":2,"c":3,"d":4}
ser3 = pd.Series(dic_a)
ser3

a    1
b    2
c    3
d    4
dtype: int64

In [59]:
ser4 = pd.Series(dic_a,index=["p","q","r","a"])
ser4

p    NaN
q    NaN
r    NaN
a    1.0
dtype: float64

In [63]:
ser4[np.isnan(ser4)] #numpy has isnan function to identify Not a Number values

p   NaN
q   NaN
r   NaN
dtype: float64

###### Create a Series from Scalar
If data is a scalar value, an index must be provided. The value will be repeated to match the length of index

In [24]:
ser5 = pd.Series(5,index=[0,1])
ser5

0    5
1    5
dtype: int64

###### Accessing Data from Series with Positions

In [27]:
ser3

a    1
b    2
c    3
d    4
dtype: int64

In [30]:
ser3[0] #retrieve the first element 

1

In [40]:
ser3[:3]# retrieve the first three elements from the series

a    1
b    2
c    3
dtype: int64

In [41]:
ser3[:-3]

a    1
dtype: int64

In [39]:
ser3[-3:]

b    2
c    3
d    4
dtype: int64

###### Retrieve Data Using Label (Index)

In [42]:
ser5 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
ser5

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [51]:
#retrieve a single element
print(ser5['a'])

1


In [54]:
#fetching multiple element 
print(ser5[['a','c','d','e']])

a    1
c    3
d    4
e    5
dtype: int64


In [66]:
ser5.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

###### More Querying Series

In [82]:
pd.Series.iloc?

In [71]:
ser5.iloc[1]#Purely integer-location based indexing for selection by position.

2

In [72]:
ser5.iloc[[1,2,3]]

b    2
c    3
d    4
dtype: int64

In [83]:
pd.Series.loc?

In [88]:
ser5.loc["a"]

1

In [89]:
ser5.loc[["a","b"]]

a    1
b    2
dtype: int64

In [93]:
ser6=pd.Series([1,2,3,4,5,6]) #example of summation


In [94]:
np.sum(ser6)

21

In [95]:
ser6.sum()

21

In [96]:
ser6.mean()

3.5

In [102]:
#fetching the first 5 entry 
ser7 = pd.Series([9,8,7,6,5,34,2,98])
ser7.head(7) 
ser7.tail(2)

SyntaxError: invalid syntax (<ipython-input-102-985d1bd7186d>, line 3)

In [104]:
len(ser7) #total elements in the series

8

In [114]:
%%timeit -n 100
summary =0 
for i in ser7:
  summary +=i

15.5 µs ± 842 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [115]:
%%timeit -n 100
summary = ser7.sum()

94.1 µs ± 10 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [116]:
%%timeit -n 100
summary = np.sum(ser7)

117 µs ± 27 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [118]:
ser7+2

0     11
1     10
2      9
3      8
4      7
5     36
6      4
7    100
dtype: int64

In [132]:
for label,val in ser7.iteritems():
    ser7.set_value(label, val+2)
    print(val,label)

15 0
14 1
13 2
12 3
11 4
40 5
8 6
104 7


  


In [None]:
ser7

In [35]:
ser8 = pd.Series(np.random.randint(0,1000,10000))
for label, value in ser8.iteritems():
    ser8.loc[label]= value+2  

In [13]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [15]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [17]:
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [19]:
all_countries = original_sports.append(cricket_loving_countries)
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [21]:
all_countries["Golf"]

'Scotland'

###### Creation of an empty Dataframe

In [25]:
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


###### Create a DataFrame from Lists

In [29]:
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print(df)

   0
0  1
1  2
2  3
3  4
4  5


In [30]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [32]:
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
print(df)

     Name   Age
0    Alex  10.0
1     Bob  12.0
2  Clarke  13.0


###### Create a DataFrame from Dict of ndarrays / Lists

In [39]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
print(df)

   Age   Name
0   28    Tom
1   34   Jack
2   29  Steve
3   42  Ricky


In [41]:
df1 = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print(df1)

       Age   Name
rank1   28    Tom
rank2   34   Jack
rank3   29  Steve
rank4   42  Ricky


###### Create a DataFrame from List of Dicts

In [43]:
dict1 = {"a":1,"b":2}
dict2 = {"c":3,"b":4,"a":5}
list_of_dict = [dict1,dict2]
df2 = pd.DataFrame(list_of_dict)
print(df2)

   a  b    c
0  1  2  NaN
1  5  4  3.0


In [44]:
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]

#With two column indices, values same as dictionary keys
df1 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b'])

#With two column indices with one index with other name
df2 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b1'])
print(df1)
print(df2)

        a   b
first   1   2
second  5  10
        a  b1
first   1 NaN
second  5 NaN


###### Create a DataFrame from Dict of Series

In [48]:
ser_1 = pd.Series([1,2,3,4],index = ["A","B","C","D"])
ser_2 = pd.Series([5,4,6,7],index = ["A","B","C","E"])
dict_ser ={"One":ser_1,"Two":ser_2}
df_ser_dict = pd.DataFrame(dict_ser)
print(df_ser_dict)

   One  Two
A  1.0  5.0
B  2.0  4.0
C  3.0  6.0
D  4.0  NaN
E  NaN  7.0


###### Column Selection

In [49]:
df_ser_dict["One"]

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: One, dtype: float64

In [50]:
type(df_ser_dict)

pandas.core.frame.DataFrame

In [53]:
df_ser_dict["Three"] = pd.Series([10,20,30,40],index = ["A","B","C","D"])
df_ser_dict

Unnamed: 0,One,Two,Three
A,5.0,5.0,10.0
B,4.0,4.0,20.0
C,6.0,6.0,30.0
D,,,40.0
E,7.0,7.0,


In [64]:
df_ser_dict["Four"] =df_ser_dict["One"] +df_ser_dict["Two"]
df_ser_dict

Unnamed: 0,One,Two,Three,Four
A,5.0,5.0,10.0,10.0
B,4.0,4.0,20.0,8.0
C,6.0,6.0,30.0,12.0
D,,,40.0,
E,7.0,7.0,,14.0


###### Column Deletion

In [65]:
del df_ser_dict["Four"]
df_ser_dict

Unnamed: 0,One,Two,Three
A,5.0,5.0,10.0
B,4.0,4.0,20.0
C,6.0,6.0,30.0
D,,,40.0
E,7.0,7.0,


In [61]:
df_ser_dict.pop("Four")
df_ser_dict

Unnamed: 0,One,Two,Three
A,5.0,5.0,10.0
B,4.0,4.0,20.0
C,6.0,6.0,30.0
D,,,40.0
E,7.0,7.0,


###### Row Selection, Addition, and Deletion

In [67]:
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
     'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [71]:
df.loc["a"] #using labels 

one    1.0
two    1.0
Name: a, dtype: float64

In [73]:
df.iloc[0] #using internal integer indexes 

one    1.0
two    1.0
Name: a, dtype: float64

###### Slice Rows

In [80]:
df[1:3]

Unnamed: 0,one,two
b,2.0,2
c,3.0,3


###### Addition of Rows

In [81]:
df2 = pd.DataFrame([[2,3],[4,5]],index=['a','c'])
df = df2.append(df2)
print(df)

   0  1
a  2  3
c  4  5
a  2  3
c  4  5


###### Deletion of Rows

In [84]:
# Drop rows with label 0
df = df.drop("a")
df

Unnamed: 0,0,1
c,4,5
c,4,5


In [83]:
df

Unnamed: 0,0,1
a,2,3
c,4,5
a,2,3
c,4,5
