# Working on Columns

In [42]:
import numpy as np
import pandas as pd

In [44]:
df = pd.read_csv('gapminder_full.csv')

In [45]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.853030
2,Afghanistan,1962,10267083,Asia,31.997,853.100710
3,Afghanistan,1967,11537966,Asia,34.020,836.197138
4,Afghanistan,1972,13079460,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623


## Accessing a column

In [47]:
df['country']

0       Afghanistan
1       Afghanistan
2       Afghanistan
3       Afghanistan
4       Afghanistan
           ...     
1699       Zimbabwe
1700       Zimbabwe
1701       Zimbabwe
1702       Zimbabwe
1703       Zimbabwe
Name: country, Length: 1704, dtype: object

In [48]:
# using dot notation
# dot notation is not recommeded
#     confusion with method name and column name 
#     coumn name having space is not supported
#     multiple coln cant be retrieved 

In [49]:
df.country

0       Afghanistan
1       Afghanistan
2       Afghanistan
3       Afghanistan
4       Afghanistan
           ...     
1699       Zimbabwe
1700       Zimbabwe
1701       Zimbabwe
1702       Zimbabwe
1703       Zimbabwe
Name: country, Length: 1704, dtype: object

In [50]:
df.life_exp

0       28.801
1       30.332
2       31.997
3       34.020
4       36.088
         ...  
1699    62.351
1700    60.377
1701    46.809
1702    39.989
1703    43.487
Name: life_exp, Length: 1704, dtype: float64

In [51]:
df.columns

Index(['country', 'year', 'population', 'continent', 'life_exp', 'gdp_cap'], dtype='object')

In [54]:
df[['country','year']]

Unnamed: 0,country,year
0,Afghanistan,1952
1,Afghanistan,1957
2,Afghanistan,1962
3,Afghanistan,1967
4,Afghanistan,1972
...,...,...
1699,Zimbabwe,1987
1700,Zimbabwe,1992
1701,Zimbabwe,1997
1702,Zimbabwe,2002


## Dropping a column

In [55]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.853030
2,Afghanistan,1962,10267083,Asia,31.997,853.100710
3,Afghanistan,1967,11537966,Asia,34.020,836.197138
4,Afghanistan,1972,13079460,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623


In [60]:
df.drop('country',axis=1)

Unnamed: 0,year,population,continent,life_exp,gdp_cap
0,1952,8425333,Asia,28.801,779.445314
1,1957,9240934,Asia,30.332,820.853030
2,1962,10267083,Asia,31.997,853.100710
3,1967,11537966,Asia,34.020,836.197138
4,1972,13079460,Asia,36.088,739.981106
...,...,...,...,...,...
1699,1987,9216418,Africa,62.351,706.157306
1700,1992,10704340,Africa,60.377,693.420786
1701,1997,11404948,Africa,46.809,792.449960
1702,2002,11926563,Africa,39.989,672.038623


In [59]:
df# changes will take into effect only if the parammeter inplace=True is set

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.853030
2,Afghanistan,1962,10267083,Asia,31.997,853.100710
3,Afghanistan,1967,11537966,Asia,34.020,836.197138
4,Afghanistan,1972,13079460,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623


In [62]:
df.drop(['country','year'],axis=1) #multiple colmn names have to passed inside a list

Unnamed: 0,population,continent,life_exp,gdp_cap
0,8425333,Asia,28.801,779.445314
1,9240934,Asia,30.332,820.853030
2,10267083,Asia,31.997,853.100710
3,11537966,Asia,34.020,836.197138
4,13079460,Asia,36.088,739.981106
...,...,...,...,...
1699,9216418,Africa,62.351,706.157306
1700,10704340,Africa,60.377,693.420786
1701,11404948,Africa,46.809,792.449960
1702,11926563,Africa,39.989,672.038623


In [63]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap
0,Afghanistan,1952,8425333,Asia,28.801,779.445314
1,Afghanistan,1957,9240934,Asia,30.332,820.853030
2,Afghanistan,1962,10267083,Asia,31.997,853.100710
3,Afghanistan,1967,11537966,Asia,34.020,836.197138
4,Afghanistan,1972,13079460,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623


# adding a new column

In [67]:
df['new_gdp']= df['gdp_cap']*2 #supporting elmentwise operation

In [68]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap,new_gdp
0,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629
1,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059
2,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420
3,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276
4,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212
...,...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245


# adding custom indexing

In [69]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap,new_gdp
0,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629
1,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059
2,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420
3,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276
4,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212
...,...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245


In [70]:
df.index

RangeIndex(start=0, stop=1704, step=1)

In [71]:
# creating new colmn

In [79]:
# for i in range(1,1705):
#     print(i)


# using list comprehension
[i for i in range(1,1705)]


[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185

In [74]:
df['custom_coln'] = [i for i in range(1,1705)]

In [75]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap,new_gdp,custom_coln
0,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629,1
1,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059,2
2,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420,3
3,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276,4
4,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212,5
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612,1700
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571,1701
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921,1702
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245,1703


In [80]:
df['custom_coln'] = [i*i for i in range(1,1705)]

In [81]:
df

Unnamed: 0,country,year,population,continent,life_exp,gdp_cap,new_gdp,custom_coln
0,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629,1
1,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059,4
2,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420,9
3,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276,16
4,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212,25
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612,2890000
1700,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571,2893401
1701,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921,2896804
1702,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245,2900209


In [83]:
df.index = df['custom_coln']

In [84]:
df.index

Int64Index([      1,       4,       9,      16,      25,      36,      49,
                 64,      81,     100,
            ...
            2873025, 2876416, 2879809, 2883204, 2886601, 2890000, 2893401,
            2896804, 2900209, 2903616],
           dtype='int64', name='custom_coln', length=1704)

In [85]:
df

Unnamed: 0_level_0,country,year,population,continent,life_exp,gdp_cap,new_gdp,custom_coln
custom_coln,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629,1
4,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059,4
9,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420,9
16,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276,16
25,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212,25
...,...,...,...,...,...,...,...,...
2890000,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612,2890000
2893401,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571,2893401
2896804,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921,2896804
2900209,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245,2900209


In [87]:
df.drop('custom_coln',axis=1) # removing the 'custom_coln' from the colunn data 

Unnamed: 0_level_0,country,year,population,continent,life_exp,gdp_cap,new_gdp
custom_coln,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Afghanistan,1952,8425333,Asia,28.801,779.445314,1558.890629
4,Afghanistan,1957,9240934,Asia,30.332,820.853030,1641.706059
9,Afghanistan,1962,10267083,Asia,31.997,853.100710,1706.201420
16,Afghanistan,1967,11537966,Asia,34.020,836.197138,1672.394276
25,Afghanistan,1972,13079460,Asia,36.088,739.981106,1479.962212
...,...,...,...,...,...,...,...
2890000,Zimbabwe,1987,9216418,Africa,62.351,706.157306,1412.314612
2893401,Zimbabwe,1992,10704340,Africa,60.377,693.420786,1386.841571
2896804,Zimbabwe,1997,11404948,Africa,46.809,792.449960,1584.899921
2900209,Zimbabwe,2002,11926563,Africa,39.989,672.038623,1344.077245
