In [17]:
import numpy as np
import pandas as pd

# Demonstration of Panda Series
data = pd.Series([0.25, 0.5, 0.75, 1.00, 1.25], index=['a', 'n', 'a', 'n', 'd'])
print(data['a'])
print(data['n'])

dobs = {'anand':19051965, 'neeraja':13041971, 'nikhil vikas':12081996, 'abhigjna deepthi':22021999}
dob_series = pd.Series(dobs)
print(dob_series)

dob_series['neeraja':'abhigjna deepthi']

temps = pd.Series([25, 32, 23, 42, 28, 34, 29, 40, 28, 29, 32, 23], index=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'])
print(temps)

a    0.25
a    0.75
dtype: float64
n    0.5
n    1.0
dtype: float64
anand               19051965
neeraja             13041971
nikhil vikas        12081996
abhigjna deepthi    22021999
dtype: int64
jan    25
feb    32
mar    23
apr    42
may    28
jun    34
jul    29
aug    40
sep    28
oct    29
nov    32
dec    23
dtype: int64


In [25]:
# Demonstration of Panda Dataframe
population = {'California': 38332521, 'Florida':19552860, 'Illinois':12882135, 'New York':19651127, 'Texas':26448193 }
area = {'California': 423967, 'Florida':170312, 'Illinois':149995, 'New York':141297, 'Texas':695662 }
states_df = pd.DataFrame({'population': population, 'area': area})
print("states_df:\n", states_df, "\n")
print("states.index:\n", states_df.index, "\n")
print("states.values:\n", states_df.values, "\n")
print("states.columns: n", states_df.columns, "\n")
print("states['area']:\n", states_df['area'], "\n")
print("states['population']:\n", states_df['population'], "\n")

# Creation of a Panda Dataframe in several ways
# From a single series object
df1 = pd.DataFrame(population, columns=['population'])
print("Dataframe from a single series object:\n", df1, "\n")

# From a list of dicts
df2 = [{'a':i, 'b':i * 2} for i in range(5)]
print("Dataframe from a list of dicts:\n", pd.DataFrame(df2), "\n")

# Even if some keys are missing pandas fills them with NaN
df2a = pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])
print("Dataframe with missing keys:\n", df2a, "\n")

# From a list of series objects
df3 = pd.DataFrame({'population': population, 'area': area})
print("Dataframe from a list of series objects:\n", df3, "\n")

# From a two-dim numpy array
df4 = pd.DataFrame(np.random.rand(3, 2),
                  columns = ['foo', 'bar'],
                  index = ['a', 'b',  'c'])
print("Dataframe from two dimensional numpy array\n", df4, "\n")

# From a numpy structured array
np_array = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
df5 =  pd.DataFrame(np_array)
print("Dataframe from a numpy structured array\n", df5, "\n")



states_df:
             population    area
California    38332521  423967
Florida       19552860  170312
Illinois      12882135  149995
New York      19651127  141297
Texas         26448193  695662 

states.index:
 Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object') 

states.values:
 [[38332521   423967]
 [19552860   170312]
 [12882135   149995]
 [19651127   141297]
 [26448193   695662]] 

states.columns: n Index(['population', 'area'], dtype='object') 

states['area']:
 California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64 

states['population']:
 California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
Name: population, dtype: int64 

Dataframe from a single series object:
 Empty DataFrame
Columns: [population]
Index: [] 

Dataframe from a list of dicts:
    a  b
0  0  0
1  1  2
2  2  4
3  3  6
4  4  8 

Dataframe with mis

In [37]:
population = {'California': 38332521, 'Florida':19552860, 'Illinois':12882135, 'New York':19651127, 'Texas':26448193 }
area = {'California': 423967, 'Florida':170312, 'Illinois':149995, 'New York':141297, 'Texas':695662 }
states_df = pd.DataFrame({'population': population, 'area': area})
states_df['density'] = states_df.population / states_df.area
print("states_df:\n")
print(states_df)
print("states_df transpose:\n")
print(states_df.T)
print("Partial data using indexes:\n")
print(states_df.iloc[:3, :2])
print("Parial data using literals:\n")
states_df.loc['Illinois', 'population']

states_df:

            population    area     density
California    38332521  423967   90.413926
Florida       19552860  170312  114.806121
Illinois      12882135  149995   85.883763
New York      19651127  141297  139.076746
Texas         26448193  695662   38.018740
states_df transpose:

              California       Florida      Illinois      New York  \
population  3.833252e+07  1.955286e+07  1.288214e+07  1.965113e+07   
area        4.239670e+05  1.703120e+05  1.499950e+05  1.412970e+05   
density     9.041393e+01  1.148061e+02  8.588376e+01  1.390767e+02   

                   Texas  
population  2.644819e+07  
area        6.956620e+05  
density     3.801874e+01  
Partial data using indexes:

            population    area
California    38332521  423967
Florida       19552860  170312
Illinois      12882135  149995
Parial data using literals:



12882135

In [40]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
print(ser)
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                 columns=['A', 'B', 'C', 'D'])
print(df)
df_sin = np.sin(df * np.pi / 4)
print(df_sin)


0    6
1    3
2    7
3    4
dtype: int32
   A  B  C  D
0  6  9  2  6
1  7  4  3  7
2  7  2  5  4


Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16
