In [25]:
# importing modules
import pandas as pd
import numpy as np

In [2]:
# loading data
data = pd.read_csv("./country_data.tsv", delimiter="\t")

In [3]:
# inspecting loaded data
data.head(10)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
5,Afghanistan,Asia,1977,38.438,14880372,786.11336
6,Afghanistan,Asia,1982,39.854,12881816,978.011439
7,Afghanistan,Asia,1987,40.822,13867957,852.395945
8,Afghanistan,Asia,1992,41.674,16317921,649.341395
9,Afghanistan,Asia,1997,41.763,22227415,635.341351


In [4]:
# getting dataframe info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
dtypes: float64(2), int64(2), object(2)
memory usage: 80.0+ KB


<h2>Series</h2>

In [7]:
# series from list
series_1 = pd.Series([4, 7, -5, 3])

In [8]:
series_1

0    4
1    7
2   -5
3    3
dtype: int64

In [9]:
# getting values of series
series_1.values

array([ 4,  7, -5,  3], dtype=int64)

In [10]:
# getting index of series
series_1.index

RangeIndex(start=0, stop=4, step=1)

In [11]:
series_with_index = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
series_with_index

d    4
b    7
a   -5
c    3
dtype: int64

In [12]:
series_with_index.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [13]:
# getting element by index
series_with_index['b']

7

In [14]:
# getting multiple elements by index
series_with_index[["a", "c", "d"]]

a   -5
c    3
d    4
dtype: int64

In [15]:
# assigning elements using index
series_with_index['d'] = 6
series_with_index

d    6
b    7
a   -5
c    3
dtype: int64

In [16]:
# multiplying each element by 2
series_with_index * 2

d    12
b    14
a   -10
c     6
dtype: int64

In [17]:
# checking index in series
'b' in series_with_index

True

In [18]:
# creating series from dict
states_dict = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}

states_series = pd.Series(states_dict)
states_series

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [19]:
# naming index and data column
states_series.name = "population"
states_series.index.name = "state"
states_series

state
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
Name: population, dtype: int64

<h2>Dataframe</h2>

In [20]:
# creating df from a dict
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"], "year": [2000, 2001, 2002, 2001, 2002, 2003], "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

states_df = pd.DataFrame(data=data)
states_df

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [21]:
# getting columns of df
states_df.columns

Index(['state', 'year', 'pop'], dtype='object')

In [22]:
# selecting a column
states_df["pop"]

0    1.5
1    1.7
2    3.6
3    2.4
4    2.9
5    3.2
Name: pop, dtype: float64

In [23]:
# transposing dataframe
states_df.T

Unnamed: 0,0,1,2,3,4,5
state,Ohio,Ohio,Ohio,Nevada,Nevada,Nevada
year,2000,2001,2002,2001,2002,2003
pop,1.5,1.7,3.6,2.4,2.9,3.2


<h2>Index Objects</h2>

In [24]:
states_series.index

Index(['Ohio', 'Texas', 'Oregon', 'Utah'], dtype='object', name='state')

In [26]:
# creating index object
pd.Index(data=np.arange(5))

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [27]:
# index objects behave as fixed size sets
"Texas" in states_series.index

True

<h2>Essential Functionality</h2>

In [28]:
states_df

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [31]:
# dropping rows
states_df.drop([1, 4])

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
2,Ohio,2002,3.6
3,Nevada,2001,2.4
5,Nevada,2003,3.2


In [30]:
# dropping columns
states_df.drop("year", axis=1)

Unnamed: 0,state,pop
0,Ohio,1.5
1,Ohio,1.7
2,Ohio,3.6
3,Nevada,2.4
4,Nevada,2.9
5,Nevada,3.2


<i>many functions like drop, which modifies size and shape of the dataframe can manipulate an object in-place whithout returning a new object</i>

In [32]:
#states_df.drop("year", axis=1, inplace=True)

<h2>Indexing, Selection & Filtering</h2>

In [33]:
series_with_index

d    6
b    7
a   -5
c    3
dtype: int64

In [34]:
# selecting using index
series_with_index[1]

7

In [35]:
# selecting using label
series_with_index["b"]

7

In [36]:
# slicing
series_with_index[:3]

d    6
b    7
a   -5
dtype: int64

In [37]:
state_number_df = pd.DataFrame(data=np.arange(16).reshape(4, 4), index=["Ohio", "Colorado", "Utah", "New York"], columns=["one", "two", "three", "four"])
state_number_df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [38]:
# selecting a column
state_number_df.three

Ohio         2
Colorado     6
Utah        10
New York    14
Name: three, dtype: int32

In [39]:
# selecting multiple columns
state_number_df[["two", "four"]]

Unnamed: 0,two,four
Ohio,1,3
Colorado,5,7
Utah,9,11
New York,13,15


In [41]:
# selecting a row by label
state_number_df.loc["Utah"]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [42]:
# selecting a row by index
state_number_df.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [43]:
# selecting multiple rows
state_number_df.iloc[:3]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11


In [46]:
# selecting rows and columns
state_number_df.iloc[0, :2]

one    0
two    1
Name: Ohio, dtype: int32

In [49]:
state_number_df.loc[["Ohio", "Colorado"], ["three", "four"]]

Unnamed: 0,three,four
Ohio,2,3
Colorado,6,7


In [50]:
state_number_df.iloc[-2:, -2:]

Unnamed: 0,three,four
Utah,10,11
New York,14,15
