# Panda Refresher & Missing Values Treatment

### Loading Libraries

In [3]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import plotly.io as pio
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# OS
import os

from pathlib import Path

# Notebook Optimizer
from tqdm.auto import tqdm

# Itertools
from itertools import cycle

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# !pip install -U kaleido

In [7]:
pio.templates.default = "plotly_white"

In [10]:
# %autoreload 2
    
# %load_ext autoreload

In [11]:
tqdm.pandas()

np.random.seed()

In [12]:
SAVE_FIGURES = False
if SAVE_FIGURES:
    os.makedirs("imgs/chap_02", exist_ok=True)

## Loading Data

### Pandas Datetime Operations, Indexing, & Slicing – A refresher

In [14]:
df = pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00247/data_akbilgic.xlsx",
                   skiprows=1)

df.head()


Unknown extension is not supported and will be removed



Unnamed: 0,date,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802


### Converting `date columns` to `pd.Timestamp/DatetimeIndex`

In [15]:
pd.to_datetime("13-4-1987").strftime("%d, %B %Y")





'13, April 1987'

In [16]:
pd.to_datetime("4-1-1987").strftime("%d, %B %Y")

'01, April 1987'

In [17]:
pd.to_datetime("4-1-1987", dayfirst=True).strftime("%d, %B %Y")

'04, January 1987'

In [18]:
pd.to_datetime("4|1|1987", format="%d|%m|%Y").strftime("%d, %B %Y")

'04, January 1987'

In [19]:
# Placing on Year First
df['date'] = pd.to_datetime(df['date'], yearfirst=True)
df['date'].dtype

dtype('<M8[ns]')

In [20]:
# Min-Max Date
df.date.min(),df.date.max()

(Timestamp('2009-01-05 00:00:00'), Timestamp('2011-02-22 00:00:00'))

#### `dt accessor` and `datetime` Properties

In [21]:
df.head()

Unnamed: 0,date,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802


In [22]:
print(f"""
Date: {df.date.iloc[0]}
Day of year: {df.date.dt.day_of_year.iloc[0]}
Day of week: {df.date.dt.dayofweek.iloc[0]}
Month: {df.date.dt.month.iloc[0]}
Month Name: {df.date.dt.month_name().iloc[0]}
Quarter: {df.date.dt.quarter.iloc[0]}
Year: {df.date.dt.year.iloc[0]}
ISO Week: {df.date.dt.isocalendar().week.iloc[0]}
""")


Date: 2009-01-05 00:00:00
Day of year: 5
Day of week: 0
Month: 1
Month Name: January
Quarter: 1
Year: 2009
ISO Week: 2



#### Slicing and Indexing