# Series 
- 1 D labeled array that can hold any data type
- We can use default index or our own indexes(unlike python lists)

In [1]:
import pandas as pd

In [4]:
# Creating pd series with default indexes
s=pd.Series([1,2,3,4,5,6,7])
s

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

In [5]:
# Creating pd series with custom indexes
s = pd.Series([1,2,3,4,5,6,7],index=['Mon','Tue','Wed','Thurs','Fri','Sat','Sun'])
s

Mon      1
Tue      2
Wed      3
Thurs    4
Fri      5
Sat      6
Sun      7
dtype: int64

In [6]:
s[0] # or s['Mon']

1

In [7]:
s['Sun']=99
s

Mon       1
Tue       2
Wed       3
Thurs     4
Fri       5
Sat       6
Sun      99
dtype: int64

In [8]:
s.mean()

17.142857142857142

In [9]:
s*3

Mon        3
Tue        6
Wed        9
Thurs     12
Fri       15
Sat       18
Sun      297
dtype: int64

In [10]:
s+s

Mon        2
Tue        4
Wed        6
Thurs      8
Fri       10
Sat       12
Sun      198
dtype: int64

# DataFrame
- 2D labeled data structure

In [11]:
# making a dataframe with dictionary of series
df = pd.DataFrame({'first':s,'second':s*2}) 
df

Unnamed: 0,first,second
Mon,1,2
Tue,2,4
Wed,3,6
Thurs,4,8
Fri,5,10
Sat,6,12
Sun,99,198


In [12]:
# making dataframe from lists
column_names=['city','population']
index_country=['Japan','India','China','Brazil','Mexico']

row1=['Tokyo',37.4]
row2=['Delhi',28.5]
row3=['Shanghai',25.6]
row4=['Sao Paulo',21.7]
row5=['Mexico City',21.6]

data = [row1,row2,row3,row4,row5] # data is list of lists

df = pd.DataFrame(data,index=index_country,columns=column_names)

df 

Unnamed: 0,city,population
Japan,Tokyo,37.4
India,Delhi,28.5
China,Shanghai,25.6
Brazil,Sao Paulo,21.7
Mexico,Mexico City,21.6


In [13]:
# making a dataframe from dictionaries
cities=['Tokyo','Delhi','Shanghai','Sao Paulo','Mexico City']
population=[37.4,28.5,25.6,21.7,21.6]
index_country=['Japan','India','China','Brazil','Mexico']

dict_population = {'city':cities,'population':population}

df = pd.DataFrame(dict_population,index=index_country)

df

Unnamed: 0,city,population
Japan,Tokyo,37.4
India,Delhi,28.5
China,Shanghai,25.6
Brazil,Sao Paulo,21.7
Mexico,Mexico City,21.6


In [14]:
df['city']

Japan           Tokyo
India           Delhi
China        Shanghai
Brazil      Sao Paulo
Mexico    Mexico City
Name: city, dtype: object

# Loading data from external sources
read_csv, read_excel or read_sql

In [2]:
billionaire = pd.read_csv('Billionaire.csv')
fertility = pd.read_excel('worldFertility.xlsx',sheet_name='FERTILITY INDICATORS')

In [3]:
# billionaire 

# tail - gives last rows
# billionaire.tail(10) 

# head - # gives first rows
# billionaire.head(10) 

# shape - gives rowsXcolumns
# billionaire.shape 

# Getting column names and indexes(that identify each row, default from 0 to n)
# billionaire.columns
# billionaire.index 

billionaire.info() # gives overview of data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2755 entries, 0 to 2754
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      2755 non-null   object 
 1   NetWorth  2755 non-null   object 
 2   Country   2755 non-null   object 
 3   Source    2755 non-null   object 
 4   Rank      2755 non-null   int64  
 5   Age       2676 non-null   float64
 6   Industry  2755 non-null   object 
dtypes: float64(1), int64(1), object(5)
memory usage: 150.8+ KB


# Data Types

- int8, int16, int32, int64
- float32, float64
- object
- bool
- datetime64
- string
- category

## dtypes Conversion
astype()
to_numeric()/to_datetime()
read_*(dtype)

# Exporting Data


In [5]:
fertility.to_csv('Fertility.csv',index=False) # changes made to fertility dataframe do not presist

In [7]:
# changes made to fertility dataframe will persist
# but dont read from unsafe sources
fertility.to_pickle('Fertility.pkl') 

In [8]:
fertility_pkl = pd.read_pickle('Fertility.pkl')

In [9]:
fertility_pkl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79780 entries, 0 to 79779
Data columns (total 17 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   79779 non-null  object
 1   Unnamed: 1   79775 non-null  object
 2   Unnamed: 2   79775 non-null  object
 3   Unnamed: 3   79775 non-null  object
 4   Unnamed: 4   79775 non-null  object
 5   Unnamed: 5   79775 non-null  object
 6   Unnamed: 6   79775 non-null  object
 7   Unnamed: 7   79775 non-null  object
 8   Unnamed: 8   79775 non-null  object
 9   Unnamed: 9   79775 non-null  object
 10  Unnamed: 10  79775 non-null  object
 11  Unnamed: 11  79775 non-null  object
 12  Unnamed: 12  79775 non-null  object
 13  Unnamed: 13  79775 non-null  object
 14  Unnamed: 14  79775 non-null  object
 15  Unnamed: 15  79775 non-null  object
 16  Unnamed: 16  79775 non-null  object
dtypes: object(17)
memory usage: 10.3+ MB
