# Features of  Pandas
* Fast and efficient DataFrame object with default and customized indexing.
* Tools for loading data into in-memory data objects from different file formats.
* Data alignment and integrated handling of missing data.
* Reshaping and pivoting of date sets.
* Label-based slicing, indexing and subsetting of large data sets.
* Columns from a data structure can be deleted or inserted.
* Group by data for aggregation and transformations.
* High performance merging and joining of data.

# Data Structures in Pandas
* Series
* DataFrame
* Panel
DataFrame is widely used and one of the most important data structures.

# Series
pandas.Series( data, index, dtype, copy)

In [5]:
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
print(data)
s1 = pd.Series(data)
print (s1)
s2=pd.Series([1,2,3,4])
print (s2)
d={"John":25,"Jibin":30,"Alan":22}
s3=pd.Series(d)
print (s3)

['a' 'b' 'c' 'd']
0    a
1    b
2    c
3    d
dtype: object
0    1
1    2
2    3
3    4
dtype: int64
John     25
Jibin    30
Alan     22
dtype: int64


In [5]:
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=['x','y','z','p'])        #index should be unique
print (s[0])   
print (s['x'])      #accessing via index given
print (s[:3])       #slicing
print (s[['x','y','z']])


a
a
x    a
y    b
z    c
dtype: object
x    a
y    b
z    c
dtype: object


# DataFrame
pandas.DataFrame( data, index, columns, dtype, copy)

In [12]:
#Creation
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print(df)


#from list of list
data = [['Alex',10,300],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age','Sal'])
print(df)                    #print(df['Name']) accessing each columns

#from dict
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print(df)

   0
0  1
1  2
2  3
3  4
4  5
0      Alex
1       Bob
2    Clarke
Name: Name, dtype: object
        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  Steve   29
rank4  Ricky   42


In [18]:
# accessing by col
import pandas as pd

d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print(df ['one'])
print("\nAdding a new column by passing as Series:")
df['three']=pd.Series([10,20,30],index=['a','b','c'])
print(df)

print ("\nAdding a new column using the existing columns in DataFrame:")
df['four']=df['one']+df['three']

print(df)

# using del function
print ("\nDeleting the first column using DEL function:")
del df['one']
print(df)

# using pop function
print ("\nDeleting another column using POP function:")
df.pop('two')
print(df)

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

Adding a new column by passing as Series:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN

Adding a new column using the existing columns in DataFrame:
   one  two  three  four
a  1.0    1   10.0  11.0
b  2.0    2   20.0  22.0
c  3.0    3   30.0  33.0
d  NaN    4    NaN   NaN

Deleting the first column using DEL function:
   two  three  four
a    1   10.0  11.0
b    2   20.0  22.0
c    3   30.0  33.0
d    4    NaN   NaN

Deleting another column using POP function:
   three  four
a   10.0  11.0
b   20.0  22.0
c   30.0  33.0
d    NaN   NaN


In [19]:
#accessing by rows
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print(df.loc['b'])

#Rows can be selected by passing integer location to an iloc function.
print(df.iloc[2])

#row slice
print(df[2:4])

one    2.0
two    2.0
Name: b, dtype: float64
one    3.0
two    3.0
Name: c, dtype: float64
   one  two
c  3.0    3
d  NaN    4


# Functionalities

In [21]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)
print()
print (df.T)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13

         0    1       2
Name  Alex  Bob  Clarke
Age     10   12      13


In [22]:
print (df.dtypes)

Name    object
Age      int64
dtype: object


In [23]:
df.ndim

2

In [9]:
df.shape

(3, 2)

In [10]:
df.size

6

In [35]:
import pandas as pd
import numpy as np

#Create a Dictionary of series
d = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack',
   'Lee','David','Gasper','Betina','Andres']),
   'Age':pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])
}

#Create a DataFrame
df = pd.DataFrame(d)
print(df)
print()
print(df['Name'].sum())          #aggregates a column values
print(df.sum())                  #same as print(df[:].sum())
#df.mean

      Name  Age  Rating
0      Tom   25    4.23
1    James   26    3.24
2    Ricky   25    3.98
3      Vin   23    2.56
4    Steve   30    3.20
5    Smith   29    4.60
6     Jack   23    3.80
7      Lee   34    3.78
8    David   40    2.98
9   Gasper   30    4.80
10  Betina   51    4.10
11  Andres   46    3.65

TomJamesRickyVinSteveSmithJackLeeDavidGasperBetinaAndres
Name      TomJamesRickyVinSteveSmithJackLeeDavidGasperBe...
Age                                                     382
Rating                                                44.92
dtype: object
Name      TomJamesRickyVinSteveSmithJackLeeDavidGasperBe...
Age                                                     382
Rating                                                44.92
dtype: object
