In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import calendar
import datetime as dt
import plotly.express as px

# Load the data from 'data.csv'
df = pd.read_csv('data.csv')

# Display the first few rows of the DataFrame
print(df.head())

# Check for null values in the DataFrame
print(df.isnull().sum())

# Rename columns for clarity
df.columns = ['States', 'Date', 'Frequency', 'Estimated Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate', 'Region', 'longitude', 'latitude']

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)

# Convert the 'Frequency' column to categorical data type
df['Frequency'] = df['Frequency'].astype('category')

# Extract the 'Month' from the Date
df['Month'] = df['Date'].dt.month

# Create a new column 'MonthNumber' by converting the 'Month' column values to integers
df['MonthNumber'] = df['Month'].apply(lambda x: int(x))

# Create a new column 'MonthName' by converting the 'MonthNumber' column values to the month names
df['MonthName'] = df['MonthNumber'].apply(lambda x: calendar.month_abbr[x])

# Ensure the 'Region' column is categorical
df['Region'] = df['Region'].astype('category')

# Drop the 'Month' column as it is no longer needed
df.drop(columns='Month', inplace=True)

# Display summary statistics for numerical columns
print(round(df[['Estimated Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate']].describe().T, 2))

# Group by 'Region' and find mean values for the numerical columns
regionStats = df.groupby(['Region'])[['Estimated Unemployment Rate',
                                      'Estimated Employed',
                                      'Estimated Labour Participation Rate']].mean().reset_index()

# Round the values to 2 decimal points
regionStats = round(regionStats, 2)

# Create a bar graph using Plotly Express
fig = px.bar(regionStats, x='Region', y='Estimated Unemployment Rate', title='Unemployment Rate by Region')
fig.update_xaxes(title_text='Region')
fig.update_yaxes(title_text='Estimated Unemployment Rate')

# Show the plot
fig.show()


           Region         Date  Frequency   Estimated Unemployment Rate (%)  \
0  Andhra Pradesh   31-01-2020          M                              5.48   
1  Andhra Pradesh   29-02-2020          M                              5.83   
2  Andhra Pradesh   31-03-2020          M                              5.79   
3  Andhra Pradesh   30-04-2020          M                             20.51   
4  Andhra Pradesh   31-05-2020          M                             17.43   

    Estimated Employed   Estimated Labour Participation Rate (%) Region.1  \
0             16635535                                     41.02    South   
1             16545652                                     40.90    South   
2             15881197                                     39.18    South   
3             11336911                                     33.10    South   
4             12988845                                     36.46    South   

   longitude  latitude  
0    15.9129     79.74  
1    15.9129