### Import Libraries

In [30]:
import pandas as pd
import matplotlib.colorbar
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import plotly.graph_objects as go
import plotly.express as px
import bar_chart_race as bcr

In [31]:
import os
import glob
import datetime

In [32]:
from utility_India_AQI import concat_and_sort, out_filename_india_aqi_city


### Data Analysis and Preparation

> The Datasets were sourced from: [National Air Quality Index - India](https://airquality.cpcb.gov.in/AQI_India_Iframe/)

In [33]:
dir_parent = os.path.dirname(os.getcwd())
dir_data = os.path.join(dir_parent, 'data/')
dir_animations = os.path.join(dir_parent, 'animations/')

dir_hour12 = os.path.join(dir_data, 'hour_12/')
dir_hour23 = os.path.join(dir_data, 'hour_23/')

#### Prep Year/Month/Date/Hour to read

In [34]:
file_names = []
for subdir in os.listdir(dir_data):
    if subdir.startswith('hour'):
        subdir_path = os.path.join(dir_data, subdir)
        for filename in os.listdir(subdir_path):
            file_names.append(os.path.join(subdir, filename))

In [35]:
filename_part1 = 'AQI_all_station'
filename_part_last = '_00_00Z.xlsx'

### Prepare Final DataFrame

##### Process DataFrames

In [36]:
dfs = []
dates = []
for file_name in file_names:

    # extract year, month, day, and hour from file name
    year  = int(file_name[23:27])
    month = int(file_name[28:30])
    day   = int(file_name[31:33])
    hour  = int(file_name[34:36])
    dates.append(datetime.datetime(year, month, day))
    
    # read the file into a dataframe and append it to the list
    df = pd.read_excel(os.path.join(dir_data, file_name), skiprows=3, sheet_name='Sheet1')
    
    df['State'] = df['State'].ffill()
    df['City']  = df['City'].ffill()
    
    df['Year']  = df['Month'] = df['Day'] = df['Hour'] = None
    
    df.loc[:, 'Year']  = int(year)
    df.loc[:, 'Month'] = int(month)
    df.loc[:, 'Day']   = int(day)
    df.loc[:, 'Hour']  = int(hour)

    dfs.append(df)


In [37]:
df_city_aqi = concat_and_sort(dfs, 'City')  #calling the utility function

##### Define AQI Metrics

In [38]:
AQI_MEDIAN = 'AQI_median'
AQI_MEAN   = 'AQI_mean'
AQI_MAX    = 'AQI_max'
AQI_MIN    = 'AQI_min'

In [39]:
AQI_METRIC = AQI_MEDIAN # change this to change the metric

##### Prepare DataFrame compatible for _bar_chart_race_

In [40]:
# Subset the DataFrame to include only relevant columns
df_subset = df_city_aqi[['City', 'Date', AQI_METRIC]]

# Convert 'AQI' column to numeric
df_subset[AQI_METRIC] = pd.to_numeric(df_subset[AQI_METRIC], errors='coerce')

# Drop rows with missing values
df_subset = df_subset.dropna(subset=['Date', AQI_METRIC])

# Sort the DataFrame by 'Date' and 'AQI'
df_subset = df_subset.sort_values(['Date', AQI_METRIC], ascending=[True, False])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_subset[AQI_METRIC] = pd.to_numeric(df_subset[AQI_METRIC], errors='coerce')


In [41]:
df_bcr = df_subset.pivot_table(values = AQI_METRIC,index = ['Date'], columns = 'City')

In [42]:
# fill NaN values with median AQI value for that state on that date
df_bcr = df_bcr.apply(lambda x: x.fillna(x.median()), axis=0)

### Create Bar-Chart Race

#### Select Date Range

In [43]:
# Get the earliest and latest dates
DEFAULT_START_DATE = min(df_bcr.index)
DEFAULT_END_DATE = max(df_bcr.index)

print(DEFAULT_START_DATE)
print(DEFAULT_END_DATE)

2023-07-01 23:00:00
2023-11-11 23:00:00


** Configure the following date variables to chose custom date-range. A string in 'YYYY-MM-DD' format would suffice.

In [44]:
start_date_str = DEFAULT_START_DATE 
end_date_str   = DEFAULT_END_DATE

In [45]:
start_date = pd.to_datetime(start_date_str)
end_date   = pd.to_datetime(end_date_str)

df_bcr_filtered = df_bcr.loc[start_date:end_date]


> [Choose color scheme](https://plotly.com/python/discrete-color/)

#### Bar-Chart Race parameters

In [48]:
ORIENTATION        = 'h'
SORT               = 'desc'
N_BARS             = 10 #8 #10
STEPS_PER_PERIOD   = 10
PERIOD_LENGTH      = 1000 #800 #1000 #default: 500
INTERPOLATE_PERIOD = False
LABEL_BARS         = True
BAR_SIZE           = .50 #defaullt: .95
PERIOD_LABEL       = True # {'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'}
CMAP               = 'Dark2' #default: 'dark12'   ['dark12', 'dark24', 'dark36', 'Pastel', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3']

TITLE = 'Median AQI of Indian Cities by Date and Time'

DPI = 144 #300 #default: 144

EXTENSION = '.mp4' #'.gif', '.mkv'

#### Generate Animation and Save File

In [49]:
bcr.bar_chart_race(df=df_bcr_filtered, 
                    n_bars=N_BARS, 
                    sort=SORT,
                    title=TITLE,
                    filename=dir_animations + out_filename_india_aqi_city(extension=EXTENSION),
                    orientation=ORIENTATION,
                    steps_per_period=STEPS_PER_PERIOD,
                    period_length=PERIOD_LENGTH,
                    interpolate_period=INTERPOLATE_PERIOD,
                    label_bars=LABEL_BARS,
                    bar_size=BAR_SIZE,
                    period_label=PERIOD_LABEL,
                    cmap=CMAP,
                    dpi=DPI)


  df_values.iloc[:, 0] = df_values.iloc[:, 0].fillna(method='ffill')
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canvas.print_figure(io.BytesIO())
  fig.canva