# **Introduction to Pandas**

### Installing Pandas

In [1]:
#!pip install pandas
!pip install openpyxl




[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Importing Pandas 

In [2]:
import pandas as pd
import numpy as np

### Importing Data

In [3]:
# importing csv file
df = pd.read_csv('datasets/nobel.csv') # by default delimiter is comma

In [4]:
# importing excel file
df_titanic = pd.read_excel('datasets/Titanic.xlsx') # sheet name is required if you have multiple sheets by default it will capture 1st sheet only

In [5]:
# import tsv file
df_disaster = pd.read_csv('datasets/Disaster_Vulnerability_Map.tsv',delimiter='\t')

### Seeing Sample Data

In [6]:
# To see first 5 rows of the dataset
df.head()

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
0,1901,Chemistry,The Nobel Prize in Chemistry 1901,"""in recognition of the extraordinary services ...",1/1,160,Individual,Jacobus Henricus van 't Hoff,1852-08-30,Rotterdam,Netherlands,Male,Berlin University,Berlin,Germany,1911-03-01,Berlin,Germany
1,1901,Literature,The Nobel Prize in Literature 1901,"""in special recognition of his poetic composit...",1/1,569,Individual,Sully Prudhomme,1839-03-16,Paris,France,Male,,,,1907-09-07,Châtenay,France
2,1901,Medicine,The Nobel Prize in Physiology or Medicine 1901,"""for his work on serum therapy, especially its...",1/1,293,Individual,Emil Adolf von Behring,1854-03-15,Hansdorf (Lawice),Prussia (Poland),Male,Marburg University,Marburg,Germany,1917-03-31,Marburg,Germany
3,1901,Peace,The Nobel Peace Prize 1901,,1/2,462,Individual,Jean Henry Dunant,1828-05-08,Geneva,Switzerland,Male,,,,1910-10-30,Heiden,Switzerland
4,1901,Peace,The Nobel Peace Prize 1901,,1/2,463,Individual,Frédéric Passy,1822-05-20,Paris,France,Male,,,,1912-06-12,Paris,France


In [7]:
# value in head means this much rows will return
df_disaster.head(10)

Unnamed: 0,FID,AFFGEOID,TRACTCE,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,-1,1400000US36065023400,23400,36,New York,NY,36065,Oneida,36065023400,"Census Tract 234, Oneida County, New York",...,0,0,1,1,2,154,125,3.6,2.9,10017
1,-1,1400000US36065023501,23501,36,New York,NY,36065,Oneida,36065023501,"Census Tract 235.01, Oneida County, New York",...,0,0,0,0,1,151,100,6.3,4.2,912
2,-1,1400000US36065023502,23502,36,New York,NY,36065,Oneida,36065023502,"Census Tract 235.02, Oneida County, New York",...,0,0,0,0,0,131,85,2.9,1.8,3057
3,-1,1400000US36065023702,23702,36,New York,NY,36065,Oneida,36065023702,"Census Tract 237.02, Oneida County, New York",...,0,0,0,0,0,385,136,10.2,3.6,1407
4,-1,1400000US36065023901,23901,36,New York,NY,36065,Oneida,36065023901,"Census Tract 239.01, Oneida County, New York",...,0,0,0,0,0,165,97,7.2,4.3,1646
5,-1,1400000US36065023902,23902,36,New York,NY,36065,Oneida,36065023902,"Census Tract 239.02, Oneida County, New York",...,0,0,0,0,0,155,77,7.8,3.8,873
6,-1,1400000US36065024000,24000,36,New York,NY,36065,Oneida,36065024000,"Census Tract 240, Oneida County, New York",...,0,0,0,0,0,169,85,3.8,1.9,4696
7,-1,1400000US36065024101,24101,36,New York,NY,36065,Oneida,36065024101,"Census Tract 241.01, Oneida County, New York",...,0,0,0,0,2,93,38,6.3,2.5,921


In [8]:
# last 5 rows
df.tail() # it can also have values like head

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
995,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1030,Individual,Louis Brus,1943-00-00,"Cleveland, OH",United States of America,Male,Columbia University,"New York, NY",United States of America,,,
996,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1031,Individual,Aleksey Yekimov,1945-00-00,,USSR (now Russia),Male,Nanocrystals Technology Inc.,"New York, NY",United States of America,,,
997,2023,Literature,The Nobel Prize in Literature 2023,"""for his innovative plays and prose which give...",1/1,1032,Individual,Jon Fosse,1959-09-29,Haugesund,Norway,Male,,,,,,
998,2023,Peace,The Nobel Peace Prize 2023,"""for her fight against the oppression of women...",1/1,1033,Individual,Narges Mohammadi,1972-04-21,Zanjan,Iran,Female,,,,,,
999,2023,Economics,The Sveriges Riksbank Prize in Economic Scienc...,"""for having advanced our understanding of wome...",1/1,1034,Individual,Claudia Goldin,1946-00-00,"New York, NY",United States of America,Female,Harvard University,"Cambridge, MA",United States of America,,,


### Checking our dataset

In [9]:
# number of rows and columns
df.shape # rows, columns

(1000, 18)

In [10]:
# Just want to see number of row
print('Number of Rows: ',df.shape[0])

Number of Rows:  1000


In [11]:
# Just want to see number of columns
print('Number of Columns: ',df.shape[1])

Number of Columns:  18


In [12]:
# to see all the names of the columns
df.columns

Index(['year', 'category', 'prize', 'motivation', 'prize_share', 'laureate_id',
       'laureate_type', 'full_name', 'birth_date', 'birth_city',
       'birth_country', 'sex', 'organization_name', 'organization_city',
       'organization_country', 'death_date', 'death_city', 'death_country'],
      dtype='object')

In [13]:
for i in df.columns:
    print(i)

year
category
prize
motivation
prize_share
laureate_id
laureate_type
full_name
birth_date
birth_city
birth_country
sex
organization_name
organization_city
organization_country
death_date
death_city
death_country


In [14]:
df_disaster.shape

(8, 127)

In [15]:
# data struture that pandas use
type(df)

pandas.core.frame.DataFrame

In [16]:
# checking data type of each column
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   year                  1000 non-null   int64 
 1   category              1000 non-null   object
 2   prize                 1000 non-null   object
 3   motivation            912 non-null    object
 4   prize_share           1000 non-null   object
 5   laureate_id           1000 non-null   int64 
 6   laureate_type         1000 non-null   object
 7   full_name             1000 non-null   object
 8   birth_date            968 non-null    object
 9   birth_city            964 non-null    object
 10  birth_country         969 non-null    object
 11  sex                   970 non-null    object
 12  organization_name     736 non-null    object
 13  organization_city     735 non-null    object
 14  organization_country  735 non-null    object
 15  death_date            596 non-null    o

In [17]:
# stats description
df.describe()

Unnamed: 0,year,laureate_id
count,1000.0,1000.0
mean,1973.721,509.099
std,34.523195,298.130617
min,1901.0,1.0
25%,1949.75,250.75
50%,1979.0,500.5
75%,2003.0,764.25
max,2023.0,1034.0


In [18]:
# stats description on all columns
df.describe(include='all')

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
count,1000.0,1000,1000,912,1000,1000.0,1000,1000,968,964,969,970,736,735,735,596,579,585
unique,,6,621,615,4,,2,993,949,649,129,2,325,195,29,585,293,50
top,,Medicine,The Nobel Prize in Chemistry 1972,"""for their studies of extremely fast chemical ...",1/1,,Individual,Comité international de la Croix Rouge (Intern...,1932-10-24,"New York, NY",United States of America,Male,University of California,"Cambridge, MA",United States of America,2013-11-19,Paris,United States of America
freq,,227,3,3,358,,966,3,2,55,291,905,36,53,385,2,27,206
mean,1973.721,,,,,509.099,,,,,,,,,,,,
std,34.523195,,,,,298.130617,,,,,,,,,,,,
min,1901.0,,,,,1.0,,,,,,,,,,,,
25%,1949.75,,,,,250.75,,,,,,,,,,,,
50%,1979.0,,,,,500.5,,,,,,,,,,,,
75%,2003.0,,,,,764.25,,,,,,,,,,,,


In [19]:
# stats description on categorical columns
df.describe(include='object')

Unnamed: 0,category,prize,motivation,prize_share,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
count,1000,1000,912,1000,1000,1000,968,964,969,970,736,735,735,596,579,585
unique,6,621,615,4,2,993,949,649,129,2,325,195,29,585,293,50
top,Medicine,The Nobel Prize in Chemistry 1972,"""for their studies of extremely fast chemical ...",1/1,Individual,Comité international de la Croix Rouge (Intern...,1932-10-24,"New York, NY",United States of America,Male,University of California,"Cambridge, MA",United States of America,2013-11-19,Paris,United States of America
freq,227,3,3,358,966,3,2,55,291,905,36,53,385,2,27,206


### Filtering Data

##### Column Wise Filtering

In [20]:
df['year'] # to get the one column


0      1901
1      1901
2      1901
3      1901
4      1901
       ... 
995    2023
996    2023
997    2023
998    2023
999    2023
Name: year, Length: 1000, dtype: int64

In [21]:
# another way to get the column
df.year

0      1901
1      1901
2      1901
3      1901
4      1901
       ... 
995    2023
996    2023
997    2023
998    2023
999    2023
Name: year, Length: 1000, dtype: int64

In [22]:
df[['year','category']] # to get multiple columns

Unnamed: 0,year,category
0,1901,Chemistry
1,1901,Literature
2,1901,Medicine
3,1901,Peace
4,1901,Peace
...,...,...
995,2023,Chemistry
996,2023,Chemistry
997,2023,Literature
998,2023,Peace


##### Row wise Filtering

In [23]:
# to get the rows in which sex is Male
df[df['sex']=='Male']

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
0,1901,Chemistry,The Nobel Prize in Chemistry 1901,"""in recognition of the extraordinary services ...",1/1,160,Individual,Jacobus Henricus van 't Hoff,1852-08-30,Rotterdam,Netherlands,Male,Berlin University,Berlin,Germany,1911-03-01,Berlin,Germany
1,1901,Literature,The Nobel Prize in Literature 1901,"""in special recognition of his poetic composit...",1/1,569,Individual,Sully Prudhomme,1839-03-16,Paris,France,Male,,,,1907-09-07,Châtenay,France
2,1901,Medicine,The Nobel Prize in Physiology or Medicine 1901,"""for his work on serum therapy, especially its...",1/1,293,Individual,Emil Adolf von Behring,1854-03-15,Hansdorf (Lawice),Prussia (Poland),Male,Marburg University,Marburg,Germany,1917-03-31,Marburg,Germany
3,1901,Peace,The Nobel Peace Prize 1901,,1/2,462,Individual,Jean Henry Dunant,1828-05-08,Geneva,Switzerland,Male,,,,1910-10-30,Heiden,Switzerland
4,1901,Peace,The Nobel Peace Prize 1901,,1/2,463,Individual,Frédéric Passy,1822-05-20,Paris,France,Male,,,,1912-06-12,Paris,France
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
992,2023,Physics,The Nobel Prize in Physics 2023,"""for experimental methods that generate attose...",1/3,1027,Individual,Ferenc Krausz,1962-05-17,Mór,Hungary,Male,Max Planck Institute of Quantum Optics,Garching,Germany,,,
994,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1029,Individual,Moungi Bawendi,1961-00-00,Paris,France,Male,Massachusetts Institute of Technology (MIT),"Cambridge, MA",United States of America,,,
995,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1030,Individual,Louis Brus,1943-00-00,"Cleveland, OH",United States of America,Male,Columbia University,"New York, NY",United States of America,,,
996,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1031,Individual,Aleksey Yekimov,1945-00-00,,USSR (now Russia),Male,Nanocrystals Technology Inc.,"New York, NY",United States of America,,,


In [24]:
df[df['sex']=='Female'].head()

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
19,1903,Physics,The Nobel Prize in Physics 1903,"""in recognition of the extraordinary services ...",1/4,6,Individual,"Marie Curie, née Sklodowska",1867-11-07,Warsaw,Russian Empire (Poland),Female,,,,1934-07-04,Sallanches,France
29,1905,Peace,The Nobel Peace Prize 1905,,1/1,468,Individual,"Baroness Bertha Sophie Felicita von Suttner, n...",1843-06-09,Prague,Austrian Empire (Czech Republic),Female,,,,1914-06-21,Vienna,Austria
51,1909,Literature,The Nobel Prize in Literature 1909,"""in appreciation of the lofty idealism, vivid ...",1/1,579,Individual,Selma Ottilia Lovisa Lagerlöf,1858-11-20,Mårbacka,Sweden,Female,,,,1940-03-16,Mårbacka,Sweden
62,1911,Chemistry,The Nobel Prize in Chemistry 1911,"""in recognition of her services to the advance...",1/1,6,Individual,"Marie Curie, née Sklodowska",1867-11-07,Warsaw,Russian Empire (Poland),Female,Sorbonne University,Paris,France,1934-07-04,Sallanches,France
128,1926,Literature,The Nobel Prize in Literature 1926,"""for her idealistically inspired writings whic...",1/1,597,Individual,Grazia Deledda,1871-09-27,"Nuoro, Sardinia",Italy,Female,,,,1936-08-15,Rome,Italy


In [25]:
# what conditional statements look likes in pandas
df['sex']=='Male'

0       True
1       True
2       True
3       True
4       True
       ...  
995     True
996     True
997     True
998    False
999    False
Name: sex, Length: 1000, dtype: bool

syntax for multiple conditions
- df[(condition1) & (condition2) & (condition3)]
- df[(condition1) | (condition2) | (condition3)]

In [26]:
# to get the nobel prize winners from 1920 to 1950
df[(df['year']>=1920) & (df['year']<=1950)]

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
97,1920,Chemistry,The Nobel Prize in Chemistry 1920,"""in recognition of his work in thermochemistry""",1/1,178,Individual,Walther Hermann Nernst,1864-06-25,Briesen,Prussia (Germany),Male,Berlin University,Berlin,Germany,1941-11-18,Muskau,Germany
98,1920,Literature,The Nobel Prize in Literature 1920,"""for his monumental work, <I>Growth of the Soi...",1/1,589,Individual,Knut Pedersen Hamsun,1859-08-04,Lom,Norway,Male,,,,1952-02-19,Grimstad,Norway
99,1920,Medicine,The Nobel Prize in Physiology or Medicine 1920,"""for his discovery of the capillary motor regu...",1/1,310,Individual,Schack August Steenberg Krogh,1874-11-15,Grenå,Denmark,Male,Copenhagen University,Copenhagen,Denmark,1949-09-13,Copenhagen,Denmark
100,1920,Peace,The Nobel Peace Prize 1920,,1/1,484,Individual,Léon Victor Auguste Bourgeois,1851-05-21,Paris,France,Male,,,,1925-09-29,Épernay,France
101,1920,Physics,The Nobel Prize in Physics 1920,"""in recognition of the service he has rendered...",1/1,25,Individual,Charles Edouard Guillaume,1861-02-15,Fleurier,Switzerland,Male,Bureau International des Poids et Mesures (Int...,Sèvres,France,1938-06-13,Sèvres,France
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,349,Individual,Edward Calvin Kendall,1886-03-08,"South Norwalk, CT",United States of America,Male,Mayo Clinic,"Rochester, MN",United States of America,1972-05-04,"Princeton, NJ",United States of America
254,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,350,Individual,Tadeus Reichstein,1897-07-20,Wloclawek,Poland,Male,Basel University,Basel,Switzerland,1996-08-01,Basel,Switzerland
255,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,351,Individual,Philip Showalter Hench,1896-02-28,"Pittsburgh, PA",United States of America,Male,Mayo Clinic,"Rochester, MN",United States of America,1965-03-30,Ocho Rios,Jamaica
256,1950,Peace,The Nobel Peace Prize 1950,,1/1,511,Individual,Ralph Bunche,1904-08-07,"Detroit, MI",United States of America,Male,Harvard University,"Cambridge, MA",United States of America,1971-12-09,"New York, NY",United States of America


In [27]:
# to get the nobel prize winners ]where category is Physics or Chemistry
df[(df['category']=='Chemistry') | (df['category']=='Physics')]

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
0,1901,Chemistry,The Nobel Prize in Chemistry 1901,"""in recognition of the extraordinary services ...",1/1,160,Individual,Jacobus Henricus van 't Hoff,1852-08-30,Rotterdam,Netherlands,Male,Berlin University,Berlin,Germany,1911-03-01,Berlin,Germany
5,1901,Physics,The Nobel Prize in Physics 1901,"""in recognition of the extraordinary services ...",1/1,1,Individual,Wilhelm Conrad Röntgen,1845-03-27,Lennep (Remscheid),Prussia (Germany),Male,Munich University,Munich,Germany,1923-02-10,Munich,Germany
6,1902,Chemistry,The Nobel Prize in Chemistry 1902,"""in recognition of the extraordinary services ...",1/1,161,Individual,Hermann Emil Fischer,1852-10-09,Euskirchen,Prussia (Germany),Male,Berlin University,Berlin,Germany,1919-07-15,Berlin,Germany
11,1902,Physics,The Nobel Prize in Physics 1902,"""in recognition of the extraordinary service t...",1/2,2,Individual,Hendrik Antoon Lorentz,1853-07-18,Arnhem,Netherlands,Male,Leiden University,Leiden,Netherlands,1928-02-04,,Netherlands
12,1902,Physics,The Nobel Prize in Physics 1902,"""in recognition of the extraordinary service t...",1/2,3,Individual,Pieter Zeeman,1865-05-25,Zonnemaire,Netherlands,Male,Amsterdam University,Amsterdam,Netherlands,1943-10-09,Amsterdam,Netherlands
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
992,2023,Physics,The Nobel Prize in Physics 2023,"""for experimental methods that generate attose...",1/3,1027,Individual,Ferenc Krausz,1962-05-17,Mór,Hungary,Male,Max Planck Institute of Quantum Optics,Garching,Germany,,,
993,2023,Physics,The Nobel Prize in Physics 2023,"""for experimental methods that generate attose...",1/3,1028,Individual,Anne L’Huillier,1958-08-16,Paris,France,Female,Lund University,Lund,Sweden,,,
994,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1029,Individual,Moungi Bawendi,1961-00-00,Paris,France,Male,Massachusetts Institute of Technology (MIT),"Cambridge, MA",United States of America,,,
995,2023,Chemistry,The Nobel Prize in Chemistry 2023,"""for the discovery and synthesis of quantum dots""",1/3,1030,Individual,Louis Brus,1943-00-00,"Cleveland, OH",United States of America,Male,Columbia University,"New York, NY",United States of America,,,


In [28]:
# write a condition in which you use & and | both



In [29]:
# using between function
df[df['year'].between(1920,1950)]

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
97,1920,Chemistry,The Nobel Prize in Chemistry 1920,"""in recognition of his work in thermochemistry""",1/1,178,Individual,Walther Hermann Nernst,1864-06-25,Briesen,Prussia (Germany),Male,Berlin University,Berlin,Germany,1941-11-18,Muskau,Germany
98,1920,Literature,The Nobel Prize in Literature 1920,"""for his monumental work, <I>Growth of the Soi...",1/1,589,Individual,Knut Pedersen Hamsun,1859-08-04,Lom,Norway,Male,,,,1952-02-19,Grimstad,Norway
99,1920,Medicine,The Nobel Prize in Physiology or Medicine 1920,"""for his discovery of the capillary motor regu...",1/1,310,Individual,Schack August Steenberg Krogh,1874-11-15,Grenå,Denmark,Male,Copenhagen University,Copenhagen,Denmark,1949-09-13,Copenhagen,Denmark
100,1920,Peace,The Nobel Peace Prize 1920,,1/1,484,Individual,Léon Victor Auguste Bourgeois,1851-05-21,Paris,France,Male,,,,1925-09-29,Épernay,France
101,1920,Physics,The Nobel Prize in Physics 1920,"""in recognition of the service he has rendered...",1/1,25,Individual,Charles Edouard Guillaume,1861-02-15,Fleurier,Switzerland,Male,Bureau International des Poids et Mesures (Int...,Sèvres,France,1938-06-13,Sèvres,France
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,349,Individual,Edward Calvin Kendall,1886-03-08,"South Norwalk, CT",United States of America,Male,Mayo Clinic,"Rochester, MN",United States of America,1972-05-04,"Princeton, NJ",United States of America
254,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,350,Individual,Tadeus Reichstein,1897-07-20,Wloclawek,Poland,Male,Basel University,Basel,Switzerland,1996-08-01,Basel,Switzerland
255,1950,Medicine,The Nobel Prize in Physiology or Medicine 1950,"""for their discoveries relating to the hormone...",1/3,351,Individual,Philip Showalter Hench,1896-02-28,"Pittsburgh, PA",United States of America,Male,Mayo Clinic,"Rochester, MN",United States of America,1965-03-30,Ocho Rios,Jamaica
256,1950,Peace,The Nobel Peace Prize 1950,,1/1,511,Individual,Ralph Bunche,1904-08-07,"Detroit, MI",United States of America,Male,Harvard University,"Cambridge, MA",United States of America,1971-12-09,"New York, NY",United States of America


##### Filtering with the help of row Indexes

In [32]:
# loc function is used to filter rows with the help of rows indexes
df.loc[20:25]

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
20,1904,Chemistry,The Nobel Prize in Chemistry 1904,"""in recognition of his services in the discove...",1/1,163,Individual,Sir William Ramsay,1852-10-02,Glasgow,Scotland,Male,University College,London,United Kingdom,1916-07-23,High Wycombe,United Kingdom
21,1904,Literature,The Nobel Prize in Literature 1904,"""in recognition of the fresh originality and t...",1/2,573,Individual,Frédéric Mistral,1830-09-08,Maillane,France,Male,,,,1914-03-25,Maillane,France
22,1904,Literature,The Nobel Prize in Literature 1904,"""in recognition of the numerous and brilliant ...",1/2,574,Individual,José Echegaray y Eizaguirre,1832-04-19,Madrid,Spain,Male,,,,1916-09-04,Madrid,Spain
23,1904,Medicine,The Nobel Prize in Physiology or Medicine 1904,"""in recognition of his work on the physiology ...",1/1,296,Individual,Ivan Petrovich Pavlov,1849-09-14,Ryazan,Russia,Male,Military Medical Academy,St. Petersburg,Russia,1936-02-27,Leningrad,Russia
24,1904,Peace,The Nobel Peace Prize 1904,,1/1,467,Organization,Institut de droit international (Institute of ...,,,,,,,,,,
25,1904,Physics,The Nobel Prize in Physics 1904,"""for his investigations of the densities of th...",1/1,8,Individual,Lord Rayleigh (John William Strutt),1842-11-12,"Langford Grove, Maldon, Essex",United Kingdom,Male,Royal Institution of Great Britain,London,United Kingdom,1919-06-30,,United Kingdom


In [42]:
# loc function with rows and particular columns
df.loc[250, ['sex','full_name','birth_date','birth_city']]

sex                              Male
full_name     Otto Paul Hermann Diels
birth_date                 1876-01-23
birth_city                    Hamburg
Name: 250, dtype: object

In [37]:
df.loc[78, 'prize']

'The Nobel Prize in Physics 1913'

In [45]:
df.iloc[10:15,1:5]

Unnamed: 0,category,prize,motivation,prize_share
10,Peace,The Nobel Peace Prize 1902,,1/2
11,Physics,The Nobel Prize in Physics 1902,"""in recognition of the extraordinary service t...",1/2
12,Physics,The Nobel Prize in Physics 1902,"""in recognition of the extraordinary service t...",1/2
13,Chemistry,The Nobel Prize in Chemistry 1903,"""in recognition of the extraordinary services ...",1/1
14,Literature,The Nobel Prize in Literature 1903,"""as a tribute to his noble, magnificent and ve...",1/1


In [47]:
df.loc[10:15,["sex","full_name"]]

Unnamed: 0,sex,full_name
10,Male,Charles Albert Gobat
11,Male,Hendrik Antoon Lorentz
12,Male,Pieter Zeeman
13,Male,Svante August Arrhenius
14,Male,Bjørnstjerne Martinus Bjørnson
15,Male,Niels Ryberg Finsen


In [48]:
#integer location filteration
df.iloc[70:76,0:4]

Unnamed: 0,year,category,prize,motivation
70,1912,Literature,The Nobel Prize in Literature 1912,"""primarily in recognition of his fruitful, var..."
71,1912,Medicine,The Nobel Prize in Physiology or Medicine 1912,"""in recognition of his work on vascular suture..."
72,1912,Peace,The Nobel Peace Prize 1912,
73,1912,Physics,The Nobel Prize in Physics 1912,"""for his invention of automatic regulators for..."
74,1913,Chemistry,The Nobel Prize in Chemistry 1913,"""in recognition of his work on the linkage of ..."
75,1913,Literature,The Nobel Prize in Literature 1913,"""because of his profoundly sensitive, fresh an..."


### Value Counts

In [50]:
df.sex.value_counts()

sex
Male      905
Female     65
Name: count, dtype: int64

### Unique Values

In [51]:
df.category.unique()

array(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Physics',
       'Economics'], dtype=object)

In [52]:
len(df.category.unique())

6

In [54]:
type(df.category.unique())

numpy.ndarray

##### nunique

In [87]:
df.category.nunique()

6

### tolist()

In [57]:
x = df.category.unique().tolist()
x

['Chemistry', 'Literature', 'Medicine', 'Peace', 'Physics', 'Economics']

In [58]:
type(x)

list

### Aggregate Functions

##### sum

In [59]:
df.year.sum()

1973721

In [62]:
df_titanic.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked', 'Unnamed: 12',
       'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16',
       'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20',
       'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24',
       'Unnamed: 25'],
      dtype='object')

In [63]:
df_titanic.Survived.sum()

342

##### count

In [70]:
df_titanic['PassengerId'].count()

891

##### mean

In [71]:
df_titanic['Age'].mean()

29.69911764705882

median mode std

### Null Values

In [74]:
df.isnull()

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_id,laureate_type,full_name,birth_date,birth_city,birth_country,sex,organization_name,organization_city,organization_country,death_date,death_city,death_country
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False
4,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
996,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,True
997,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True
998,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True


In [75]:
df.isnull().sum()

year                      0
category                  0
prize                     0
motivation               88
prize_share               0
laureate_id               0
laureate_type             0
full_name                 0
birth_date               32
birth_city               36
birth_country            31
sex                      30
organization_name       264
organization_city       265
organization_country    265
death_date              404
death_city              421
death_country           415
dtype: int64

In [78]:
df.isna().sum()

year                      0
category                  0
prize                     0
motivation               88
prize_share               0
laureate_id               0
laureate_type             0
full_name                 0
birth_date               32
birth_city               36
birth_country            31
sex                      30
organization_name       264
organization_city       265
organization_country    265
death_date              404
death_city              421
death_country           415
dtype: int64

##### Sort Values

In [86]:
df.isna().sum().sort_values(ascending = False) #By default ascending is True

death_city              421
death_country           415
death_date              404
organization_country    265
organization_city       265
organization_name       264
motivation               88
birth_city               36
birth_date               32
birth_country            31
sex                      30
category                  0
full_name                 0
laureate_type             0
laureate_id               0
prize_share               0
prize                     0
year                      0
dtype: int64

### Date Column

In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   year                  1000 non-null   int64 
 1   category              1000 non-null   object
 2   prize                 1000 non-null   object
 3   motivation            912 non-null    object
 4   prize_share           1000 non-null   object
 5   laureate_id           1000 non-null   int64 
 6   laureate_type         1000 non-null   object
 7   full_name             1000 non-null   object
 8   birth_date            968 non-null    object
 9   birth_city            964 non-null    object
 10  birth_country         969 non-null    object
 11  sex                   970 non-null    object
 12  organization_name     736 non-null    object
 13  organization_city     735 non-null    object
 14  organization_country  735 non-null    object
 15  death_date            596 non-null    o

### Deep Copy vs Shallow Copy

pd.DataFrame.copy()
```df_titanic.copy(deep= Ture or False)```

- When deep=True (default), a new object will be created with a copy of the calling object’s data and indices. Modifications to the data or indices of the copy will not be reflected in the original object (see notes below).
- 
When deep=False, a new object will be created without copying the calling object’s data or index (only references to the data and index are copied). Any changes to the data of the original will be reflected in the shallow copy (and vice versa).

In [104]:
deep_copy = df_titanic.copy() # Deep Copy

In [105]:
deep_copy

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,1,0,3,"Braund, Mr. Owen Harris",male,50,1,0,A/5 21171,7.2500,...,,,,,,,,,,
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,50,1,0,PC 17599,71.2833,...,,,,,,,,,,
2,3,1,3,"Heikkinen, Miss. Laina",female,50,0,0,STON/O2. 3101282,7.9250,...,,,,,,,,,,
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,50,1,0,113803,53.1000,...,,,,,,,,,,
4,5,0,3,"Allen, Mr. William Henry",male,50,0,0,373450,8.0500,...,,,,,,,,,,We
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,50,0,0,211536,13.0000,...,,,,,,,,,,
887,888,1,1,"Graham, Miss. Margaret Edith",female,50,0,0,112053,30.0000,...,,,,,,,,,,
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,50,1,2,W./C. 6607,23.4500,...,,,,,,,,,,
889,890,1,1,"Behr, Mr. Karl Howell",male,50,0,0,111369,30.0000,...,,,,,,,,,,


In [106]:
shallow_copy = df_titanic

In [107]:
shallow_copy.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,1,0,3,"Braund, Mr. Owen Harris",male,50,1,0,A/5 21171,7.25,...,,,,,,,,,,
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,50,1,0,PC 17599,71.2833,...,,,,,,,,,,
2,3,1,3,"Heikkinen, Miss. Laina",female,50,0,0,STON/O2. 3101282,7.925,...,,,,,,,,,,
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,50,1,0,113803,53.1,...,,,,,,,,,,
4,5,0,3,"Allen, Mr. William Henry",male,50,0,0,373450,8.05,...,,,,,,,,,,We


### Handling Null Values
Changing in Deep copy so that our orignal data don't get disturbed