### Creating a dataframe

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({
    "Population":[23.44,54.22,77.29,88.82,32.67,66.66,777.77],
    "GDP":[18273,217278,38283,7446,73156,5471,7277],
    "Surface Area":[210,17317,8318,6573,81297,767,878],
    "HDI":[0.912,0.721,0.221,0.887,0.22,0.443,0.412],
    "Continent":["America","Europe","Europe","Asia","America","Africa","Europe"]
},columns=["Population","GDP","Surface Area","HDI","Continent"])

#### To view your DataFrame 

In [3]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,23.44,18273,210,0.912,America
1,54.22,217278,17317,0.721,Europe
2,77.29,38283,8318,0.221,Europe
3,88.82,7446,6573,0.887,Asia
4,32.67,73156,81297,0.22,America
5,66.66,5471,767,0.443,Africa
6,777.77,7277,878,0.412,Europe


#### Similar to series we can assign an index to a DataFrame

In [4]:
df.index = ["Canada","France","Germany","Thailand","Cuba","South Africa","Russia"]

In [5]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.44,18273,210,0.912,America
France,54.22,217278,17317,0.721,Europe
Germany,77.29,38283,8318,0.221,Europe
Thailand,88.82,7446,6573,0.887,Asia
Cuba,32.67,73156,81297,0.22,America
South Africa,66.66,5471,767,0.443,Africa
Russia,777.77,7277,878,0.412,Europe


#### DataFrame info tools

In [6]:
df.info() #give info about structure of your DataFrame 
          #so you can find null/invalid values, Types, etc

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to Russia
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [7]:
df.dtypes.value_counts()

float64    2
int64      2
object     1
dtype: int64

In [8]:
df.size

35

In [9]:
df.shape

(7, 5)

#### To get summary statistics for numeric values in your table

In [10]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,160.124286,52454.857143,16480.0,0.545143
std,273.346705,76636.333266,29216.846236,0.294766
min,23.44,5471.0,210.0,0.22
25%,43.445,7361.5,822.5,0.3165
50%,66.66,18273.0,6573.0,0.443
75%,83.055,55719.5,12817.5,0.804
max,777.77,217278.0,81297.0,0.912


In [11]:
df.head() #Gives first five columns of you DataFrame

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.44,18273,210,0.912,America
France,54.22,217278,17317,0.721,Europe
Germany,77.29,38283,8318,0.221,Europe
Thailand,88.82,7446,6573,0.887,Asia
Cuba,32.67,73156,81297,0.22,America


In [12]:
df.tail() #Gives last five columns of you DataFrame

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,77.29,38283,8318,0.221,Europe
Thailand,88.82,7446,6573,0.887,Asia
Cuba,32.67,73156,81297,0.22,America
South Africa,66.66,5471,767,0.443,Africa
Russia,777.77,7277,878,0.412,Europe


In [13]:
population = df["Population"]

In [14]:
population.min(),population.max()

(23.44, 777.77)

In [15]:
population.sum()

1120.87

In [16]:
len(population)

7

In [17]:
population.sum()/len(population)

160.1242857142857

In [18]:
population.quantile(0.25) #25 quartile

43.445

In [19]:
population.mean()

160.1242857142857

In [20]:
population.quantile([0.2,0.4,0.6,0.8,1])

0.2     36.980
0.4     59.196
0.6     73.038
0.8     86.514
1.0    777.770
Name: Population, dtype: float64

In [21]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,23.44,18273,210,0.912,America
France,54.22,217278,17317,0.721,Europe
Germany,77.29,38283,8318,0.221,Europe
Thailand,88.82,7446,6573,0.887,Asia
Cuba,32.67,73156,81297,0.22,America


In [22]:
df.groupby(["GDP"]).mean()

Unnamed: 0_level_0,Population,Surface Area,HDI
GDP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5471,66.66,767.0,0.443
7277,777.77,878.0,0.412
7446,88.82,6573.0,0.887
18273,23.44,210.0,0.912
38283,77.29,8318.0,0.221
73156,32.67,81297.0,0.22
217278,54.22,17317.0,0.721


In [23]:
df["count"]=1
df.groupby(["Continent"]).count()["count"]

Continent
Africa     1
America    2
Asia       1
Europe     3
Name: count, dtype: int64

### Indexing, Selection and Slicing

In [24]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


### loc and iloc (Remember)
#### loc let's you select by index and iloc let's you select by sequential position i.e. first row last row, etc
#### df lets you select by column

In [25]:
df.loc["Canada"] 

Population        23.44
GDP               18273
Surface Area        210
HDI               0.912
Continent       America
count                 1
Name: Canada, dtype: object

In [26]:
df.iloc[0]

Population        23.44
GDP               18273
Surface Area        210
HDI               0.912
Continent       America
count                 1
Name: Canada, dtype: object

In [27]:
df["Population"] #selecting entire column

Canada           23.44
France           54.22
Germany          77.29
Thailand         88.82
Cuba             32.67
South Africa     66.66
Russia          777.77
Name: Population, dtype: float64

#### Here all the results are in series

In [28]:
df.loc["Canada":"Cuba"]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1


In [29]:
df.loc["Canada":"Cuba","Population"]

Canada      23.44
France      54.22
Germany     77.29
Thailand    88.82
Cuba        32.67
Name: Population, dtype: float64

In [30]:
df.loc["Canada":"Cuba",["Population","GDP"]]

Unnamed: 0,Population,GDP
Canada,23.44,18273
France,54.22,217278
Germany,77.29,38283
Thailand,88.82,7446
Cuba,32.67,73156


In [31]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


#### Similarly for iloc

In [32]:
df.iloc[0:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1


In [33]:
df.iloc[[0,1,3]]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1


In [34]:
df.iloc[0:3,0]

Canada     23.44
France     54.22
Germany    77.29
Name: Population, dtype: float64

In [35]:
df.iloc[0:3,[0,1]]

Unnamed: 0,Population,GDP
Canada,23.44,18273
France,54.22,217278
Germany,77.29,38283


### Conditional Selection

In [36]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


In [37]:
df["Population"]>70

Canada          False
France          False
Germany          True
Thailand         True
Cuba            False
South Africa    False
Russia           True
Name: Population, dtype: bool

In [38]:
df.loc[df["Population"]>70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Russia,777.77,7277,878,0.412,Europe,1


In [39]:
df.loc[df["Population"]>70,["Population","GDP"]]

Unnamed: 0,Population,GDP
Germany,77.29,38283
Thailand,88.82,7446
Russia,777.77,7277


In [40]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1


In [41]:
df.loc[(df["Population"]>70) & (df["HDI"]>0.5)]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Thailand,88.82,7446,6573,0.887,Asia,1


In [42]:
df.loc[(df["Population"]>70) | (df["HDI"]>0.5)]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Russia,777.77,7277,878,0.412,Europe,1


In [43]:
df.loc[df["Continent"].str.contains("Europe")]
# To get reverse of this just add (~)
#df.loc[~df["Continent"].str.contains("Europe")]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Russia,777.77,7277,878,0.412,Europe,1


### Using regex 
learn regex

In [44]:
import re #regular expressions

df.loc[df["Continent"].str.contains("europe|asia",flags=re.I,regex=True)]
#Here flags=re.I ignores case sensitivity

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Russia,777.77,7277,878,0.412,Europe,1


### Drop (i.e. drop x values and give the others)

In [45]:
df.drop("Canada") #here we aren't actually removing values as
                  #these operations are all immutable 
                  #i.e. do not change the DataFrame as 99% of Pandas operations are immutable

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


In [46]:
df.drop(["Canada","Cuba"])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


In [47]:
#you can also drop columns
df.drop(columns=["Surface Area","HDI"])

Unnamed: 0,Population,GDP,Continent,count
Canada,23.44,18273,America,1
France,54.22,217278,Europe,1
Germany,77.29,38283,Europe,1
Thailand,88.82,7446,Asia,1
Cuba,32.67,73156,America,1
South Africa,66.66,5471,Africa,1
Russia,777.77,7277,Europe,1


### Operations

In [48]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


In [49]:
df[["Population","GDP"]]/100

Unnamed: 0,Population,GDP
Canada,0.2344,182.73
France,0.5422,2172.78
Germany,0.7729,382.83
Thailand,0.8882,74.46
Cuba,0.3267,731.56
South Africa,0.6666,54.71
Russia,7.7777,72.77


#### Performing operation in a bunch using series

In [50]:
remove = pd.Series([-10,-0.2],index=["Population","GDP"])
remove

Population   -10.0
GDP           -0.2
dtype: float64

In [51]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


In [52]:
df[["Population","GDP"]]+remove

Unnamed: 0,Population,GDP
Canada,13.44,18272.8
France,44.22,217277.8
Germany,67.29,38282.8
Thailand,78.82,7445.8
Cuba,22.67,73155.8
South Africa,56.66,5470.8
Russia,767.77,7276.8


### Modifying DataFrame 
(whenever you have "=" symbol you are modifying the DataFrame
&& Use inplace = True to permanently change)
#### Adding a new column

In [53]:
a = pd.Series(["Thai","French","German"],
             index=["Thailand","France","Germany"],
             name = "Language")
a

Thailand      Thai
France      French
Germany     German
Name: Language, dtype: object

In [54]:
df["Language"]=a
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count,Language
Canada,23.44,18273,210,0.912,America,1,
France,54.22,217278,17317,0.721,Europe,1,French
Germany,77.29,38283,8318,0.221,Europe,1,German
Thailand,88.82,7446,6573,0.887,Asia,1,Thai
Cuba,32.67,73156,81297,0.22,America,1,
South Africa,66.66,5471,767,0.443,Africa,1,
Russia,777.77,7277,878,0.412,Europe,1,


#### Dropping columns

In [55]:
df.drop(columns='Language')
#df.drop(columns='Language', inplace=True) to permenantly remove the column

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count
Canada,23.44,18273,210,0.912,America,1
France,54.22,217278,17317,0.721,Europe,1
Germany,77.29,38283,8318,0.221,Europe,1
Thailand,88.82,7446,6573,0.887,Asia,1
Cuba,32.67,73156,81297,0.22,America,1
South Africa,66.66,5471,767,0.443,Africa,1
Russia,777.77,7277,878,0.412,Europe,1


#### Replacing all values in a column

In [56]:
df["Language"]="UngaBunga"
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count,Language
Canada,23.44,18273,210,0.912,America,1,UngaBunga
France,54.22,217278,17317,0.721,Europe,1,UngaBunga
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga
Cuba,32.67,73156,81297,0.22,America,1,UngaBunga
South Africa,66.66,5471,767,0.443,Africa,1,UngaBunga
Russia,777.77,7277,878,0.412,Europe,1,UngaBunga


#### Renaming Columns (immutable)

In [57]:
df.rename(columns={"HDI":"Human Development Index", "Population":"Pop"},
         index={"Canada":"USA","Cuba":"Mexico"})

Unnamed: 0,Pop,GDP,Surface Area,Human Development Index,Continent,count,Language
USA,23.44,18273,210,0.912,America,1,UngaBunga
France,54.22,217278,17317,0.721,Europe,1,UngaBunga
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga
Mexico,32.67,73156,81297,0.22,America,1,UngaBunga
South Africa,66.66,5471,767,0.443,Africa,1,UngaBunga
Russia,777.77,7277,878,0.412,Europe,1,UngaBunga


In [58]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,count,Language
Canada,23.44,18273,210,0.912,America,1,UngaBunga
France,54.22,217278,17317,0.721,Europe,1,UngaBunga
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga
Cuba,32.67,73156,81297,0.22,America,1,UngaBunga
South Africa,66.66,5471,767,0.443,Africa,1,UngaBunga
Russia,777.77,7277,878,0.412,Europe,1,UngaBunga


#### Use inplace = True to permanently change

In [59]:
df.rename(columns={"HDI":"Human Development Index", "Population":"Pop"},
         index={"Canada":"USA","Cuba":"Mexico"},inplace = "True")

In [60]:
df

Unnamed: 0,Pop,GDP,Surface Area,Human Development Index,Continent,count,Language
USA,23.44,18273,210,0.912,America,1,UngaBunga
France,54.22,217278,17317,0.721,Europe,1,UngaBunga
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga
Mexico,32.67,73156,81297,0.22,America,1,UngaBunga
South Africa,66.66,5471,767,0.443,Africa,1,UngaBunga
Russia,777.77,7277,878,0.412,Europe,1,UngaBunga


In [61]:
df.rename(index=str.upper) #For upper case

Unnamed: 0,Pop,GDP,Surface Area,Human Development Index,Continent,count,Language
USA,23.44,18273,210,0.912,America,1,UngaBunga
FRANCE,54.22,217278,17317,0.721,Europe,1,UngaBunga
GERMANY,77.29,38283,8318,0.221,Europe,1,UngaBunga
THAILAND,88.82,7446,6573,0.887,Asia,1,UngaBunga
MEXICO,32.67,73156,81297,0.22,America,1,UngaBunga
SOUTH AFRICA,66.66,5471,767,0.443,Africa,1,UngaBunga
RUSSIA,777.77,7277,878,0.412,Europe,1,UngaBunga


#### Creating columns from other columns

In [62]:
#To create a new column of GDP/Capita we can - 
df["GDP per Capita"] = df["GDP"]/df["Pop"]
df

Unnamed: 0,Pop,GDP,Surface Area,Human Development Index,Continent,count,Language,GDP per Capita
USA,23.44,18273,210,0.912,America,1,UngaBunga,779.564846
France,54.22,217278,17317,0.721,Europe,1,UngaBunga,4007.340465
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga,495.316341
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga,83.83247
Mexico,32.67,73156,81297,0.22,America,1,UngaBunga,2239.240894
South Africa,66.66,5471,767,0.443,Africa,1,UngaBunga,82.073207
Russia,777.77,7277,878,0.412,Europe,1,UngaBunga,9.356236


In [63]:
#adding GDP and Surface Area
df["Total"]=df.iloc[:,1:3].sum(axis=1)
#(First colon specifies we want all the rows
# To add horizontally we need to specify axis=1)
df.head()

Unnamed: 0,Pop,GDP,Surface Area,Human Development Index,Continent,count,Language,GDP per Capita,Total
USA,23.44,18273,210,0.912,America,1,UngaBunga,779.564846,18483
France,54.22,217278,17317,0.721,Europe,1,UngaBunga,4007.340465,234595
Germany,77.29,38283,8318,0.221,Europe,1,UngaBunga,495.316341,46601
Thailand,88.82,7446,6573,0.887,Asia,1,UngaBunga,83.83247,14019
Mexico,32.67,73156,81297,0.22,America,1,UngaBunga,2239.240894,154453


#### Rearranging columns

In [64]:
#To put total column after surface area
df = df[["Pop","GDP","Surface Area","Total","Human Development Index","Continent","Language","GDP per Capita"]]
df.head()

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
USA,23.44,18273,210,18483,0.912,America,UngaBunga,779.564846
France,54.22,217278,17317,234595,0.721,Europe,UngaBunga,4007.340465
Germany,77.29,38283,8318,46601,0.221,Europe,UngaBunga,495.316341
Thailand,88.82,7446,6573,14019,0.887,Asia,UngaBunga,83.83247
Mexico,32.67,73156,81297,154453,0.22,America,UngaBunga,2239.240894


#### Creating new rows
immutable

In [65]:
df.append(pd.Series({"Population":300,"GDP":212,"Surface Area":2184,"Continent":"Asia"},name="China"))

  df.append(pd.Series({"Population":300,"GDP":212,"Surface Area":2184,"Continent":"Asia"},name="China"))


Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita,Population
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846,
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465,
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341,
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247,
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894,
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207,
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236,
China,,212,2184,,,Asia,,,300.0


permanent

In [66]:
df.loc["China"]=pd.Series({"Population":300,"GDP":212,"Surface Area":2184,"Continent":"Asia"})
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc["China"]=pd.Series({"Population":300,"GDP":212,"Surface Area":2184,"Continent":"Asia"})


Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236
China,,212,2184,,,Asia,,


#### Dropping rows

In [67]:
df.drop("China",inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("China",inplace=True)


In [68]:
df

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236


### Iterating through rows

In [69]:
for index, row in df.iterrows():
    print(index,row)

USA Pop                             23.44
GDP                             18273
Surface Area                      210
Total                         18483.0
Human Development Index         0.912
Continent                     America
Language                    UngaBunga
GDP per Capita             779.564846
Name: USA, dtype: object
France Pop                              54.22
GDP                             217278
Surface Area                     17317
Total                         234595.0
Human Development Index          0.721
Continent                       Europe
Language                     UngaBunga
GDP per Capita             4007.340465
Name: France, dtype: object
Germany Pop                             77.29
GDP                             38283
Surface Area                     8318
Total                         46601.0
Human Development Index         0.221
Continent                      Europe
Language                    UngaBunga
GDP per Capita             495.316341
Name: Ge

### Sorting data

In [70]:
df.sort_values("Continent",ascending=True)

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236


In [71]:
df.sort_values(["Continent","GDP"],ascending=True)

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465


In [72]:
df.sort_values(["Continent","GDP"],ascending=[1,0])
#Ascending for continent and descending for GDP

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
South Africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
Mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
USA,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
Thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
France,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
Germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
Russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236


### Lambda Function
An anonymous function which we can pass in instantly without defining a name or any thing like a full traditional function.

#### lambda x:x
#### (keyword)(BoundVariable):(Body)

In [73]:
df.rename(index=lambda x:x.lower())

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
usa,23.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
france,54.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
germany,77.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
thailand,88.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
mexico,32.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
south africa,66.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
russia,777.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236


#### Lambda with apply
We can use the apply() function to apply the lambda function to both rows and columns of a dataframe

In [74]:
df["Pop"]=df["Pop"].apply(lambda x:x+100)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Pop"]=df["Pop"].apply(lambda x:x+100)


Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita
USA,123.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846
France,154.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465
Germany,177.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341
Thailand,188.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247
Mexico,132.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894
South Africa,166.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207
Russia,877.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236


#### Conditional statements using Lambda

In [75]:
df["Size"]=df["Pop"].apply(lambda x:"Big"if x>150 else "Small")

In [76]:
df

Unnamed: 0,Pop,GDP,Surface Area,Total,Human Development Index,Continent,Language,GDP per Capita,Size
USA,123.44,18273,210,18483.0,0.912,America,UngaBunga,779.564846,Small
France,154.22,217278,17317,234595.0,0.721,Europe,UngaBunga,4007.340465,Big
Germany,177.29,38283,8318,46601.0,0.221,Europe,UngaBunga,495.316341,Big
Thailand,188.82,7446,6573,14019.0,0.887,Asia,UngaBunga,83.83247,Big
Mexico,132.67,73156,81297,154453.0,0.22,America,UngaBunga,2239.240894,Small
South Africa,166.66,5471,767,6238.0,0.443,Africa,UngaBunga,82.073207,Big
Russia,877.77,7277,878,8155.0,0.412,Europe,UngaBunga,9.356236,Big


### Saving our DataFrame
#### To csv

In [77]:
#df.to_csv("modified.csv")
#To remove index
#df.to_csv("modified.csv",index=False)

#### To Excel

In [78]:
#df.to_excel("modified.xlsx",index=False)

#### To text

In [79]:
#df.to_csv("modified.txt",index=False,sep="\t")
#If \t is not used it is sperated by default by commas(,)