# Pandas: DataFrame from Python dictionary

In [113]:
from datetime import date
import pandas as pd

## Create Dateframe from Dictionary

In [114]:
data = {
    "Name" : ["Anna", "Peter", "Linda", "Mark"],
    "Date of Birth" : ["15.04.2000", "23.01.1970", "22.08.1984", "31.08.2003"],
    'salery month' : [40000, 55000, 47500, 35800],
}

df = pd.DataFrame(data)  # creating a Dataframe object 
df

Unnamed: 0,Name,Date of Birth,salery month
0,Anna,15.04.2000,40000
1,Peter,23.01.1970,55000
2,Linda,22.08.1984,47500
3,Mark,31.08.2003,35800


In [115]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Name           4 non-null      object
 1   Date of Birth  4 non-null      object
 2   salery month   4 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes


## Modify column names

In [116]:
df.columns = map(
    lambda s: s.replace(" ", "_").lower(),
    df.columns
)
df.columns

Index(['name', 'date_of_birth', 'salery_month'], dtype='object')

In [117]:
df

Unnamed: 0,name,date_of_birth,salery_month
0,Anna,15.04.2000,40000
1,Peter,23.01.1970,55000
2,Linda,22.08.1984,47500
3,Mark,31.08.2003,35800


## Convert date field type

In [118]:
# errors="coerce" forces Pandas to convert the data, if the format doesn't match
df["date_of_birth"] = pd.to_datetime(df["date_of_birth"], format="%d.%m.%Y", errors="coerce")
df = df.dropna() 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   name           4 non-null      object        
 1   date_of_birth  4 non-null      datetime64[ns]
 2   salery_month   4 non-null      int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 228.0+ bytes


In [101]:
df

Unnamed: 0,name,date_of_birth,salery_month
0,Anna,2000-04-15,40000
1,Peter,1970-01-23,55000
2,Linda,1984-08-22,47500
3,Mark,2003-08-31,35800


## Read columns and rows

In [102]:
df["name"]  # Read a column

0     Anna
1    Peter
2    Linda
3     Mark
Name: name, dtype: object

In [103]:
type(df["name"])

pandas.core.series.Series

In [104]:
df

Unnamed: 0,name,date_of_birth,salery_month
0,Anna,2000-04-15,40000
1,Peter,1970-01-23,55000
2,Linda,1984-08-22,47500
3,Mark,2003-08-31,35800


In [105]:
df.iloc[0]

name                            Anna
date_of_birth    2000-04-15 00:00:00
salery_month                   40000
Name: 0, dtype: object

In [106]:
type(df.iloc[2])  # Read a row

pandas.core.series.Series

## Add columns

In [109]:
df["age"] = (date.today().year - df["date_of_birth"].dt.year).astype(int)
df["day_of_birth"] = df["date_of_birth"].dt.day_name()

In [110]:
df

Unnamed: 0,name,date_of_birth,salery_month,age,day_of_birth
0,Anna,2000-04-15,40000,24,Saturday
1,Peter,1970-01-23,55000,54,Friday
2,Linda,1984-08-22,47500,40,Wednesday
3,Mark,2003-08-31,35800,21,Sunday


## Reorder columns

In [111]:
df = df[["name", "salery_month", "age", "date_of_birth", "day_of_birth"]]
df

Unnamed: 0,name,salery_month,age,date_of_birth,day_of_birth
0,Anna,40000,24,2000-04-15,Saturday
1,Peter,55000,54,1970-01-23,Friday
2,Linda,47500,40,1984-08-22,Wednesday
3,Mark,35800,21,2003-08-31,Sunday
