In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns   
import calendar

In [None]:
import os
for dirname, _, filename in os.walk('/kaggle/input'):
    for filename in filename:
        print(os.path.join(dirname, filename))








In [None]:
import plotly.express as px


In [None]:
unemp_rate = pd.read_csv("/kaggle/input/unemployment-analysis/Unemployment_Rate_upto_11_2020.csv")

In [None]:
unemp_rate

In [None]:
unemp_rate.head()

In [None]:
unemp_rate.describe()

In [None]:
unemp_rate.info()

In [None]:
unemp_rate.isnull().sum()

In [None]:
unemp_rate.columns = ["states","date","frequency","EUR","EE","ELPR","Region","longitude","lattitude"]

In [None]:
unemp_rate.head()

In [None]:
unemp_rate["states"].unique()

In [None]:
unemp_rate["Region"].unique()

In [None]:
unemp_rate.groupby("Region").size()

In [None]:
## adding a new column as months

In [None]:
unemp_rate['date'] = pd.to_datetime(unemp_rate['date'], dayfirst=True)
unemp_rate['frequency'] = unemp_rate['frequency'].astype('category')
unemp_rate['month'] = unemp_rate['date'].dt.month
unemp_rate['month_int'] = unemp_rate['month'].apply(lambda x : int(x))
unemp_rate['month_name'] = unemp_rate['month_int'].apply(lambda x : calendar.month_abbr[x])
unemp_rate['Region'] = unemp_rate['Region'].astype('category')
unemp_rate.drop(columns='month', inplace=True)
unemp_rate

In [None]:
region_stats = unemp_rate.groupby(['Region'])[['EUR','EE','ELPR']].mean().reset_index()
region_stats = round(region_stats,2)
region_stats

## now plotting with heat maps

In [None]:
heat_map = unemp_rate[['EUR','EE','ELPR','longitude','lattitude']]

heat_map = heat_map.corr()

plt.figure(figsize=(10,6))
sns.set_context('notebook',font_scale=1)
sns.heatmap(heat_map, annot=True,cmap='autumn')

### basic stats

In [None]:
unemp_rate.stats = unemp_rate[['EUR','EE','ELPR']]

round(unemp_rate.describe().T,2)

### the data exploratory analysis 


In [None]:
figure = px.box(unemp_rate, x='states', y='EUR', color='states', title = 'the unemployment rate in india',template='plotly')
figure.update_layout(xaxis={'categoryorder':'total descending'})


In [None]:
## according to different regions of india by histogram

In [None]:
unemp_rate.columns = ["states","date","frequency","EUR","EE","ELPR","Region","longitude","lattitude"]

plt.figure(figsize=(10,8))
plt.title("the unemployment rate")
sns.histplot(x='EUR', hue="Region", data=unemp_rate)


In [None]:
### the average unemployment rate analysis

In [None]:
region = unemp_rate.groupby(["Region"])[['EUR','EE','ELPR']].mean()
region = pd.DataFrame(region).reset_index()

fig = px.bar(region, x="Region", y="EUR", color="Region", title="the average unemployment rate across regions")
fig.update_layout(xaxis={'categoryorder':'total descending'})


In [None]:
### pie chart

In [None]:
unemployment_rate= unemp_rate[["states", "Region", "EUR"]]

fig = px.sunburst(unemployment_rate, path=['Region','states'], values='EUR',
                  title= 'Unemployment rate in every State and in every Region', height=550
                 )
fig.show()

## due to the impact of lockdown in india as the cause of COVID-19
#### the estimated employed

In [None]:
figure = px.scatter_matrix(unemp_rate, template='plotly',dimensions=['EUR','EE','ELPR'],color='Region')

In [None]:
figure.show()

In [None]:
### impact of unemployment rate due to lockdown across different regions and states from jan 2020 to oct 2020

In [None]:
fig = px.bar(unemp_rate, x='Region', y='EUR', animation_frame='month_name', color='states',
             title='Unemployment rate across regions from Jan. 2020 to Oct. 2020', height=700, template='plotly')

fig.update_layout(xaxis={'categoryorder': 'total descending'})

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000


In [None]:
fig