<a href="https://colab.research.google.com/github/ambika1us/Rainfall_Trends_in_India_Analysis/blob/main/Rainfall_Trends_in_India_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [2]:
data=pd.read_csv('/content/drive/MyDrive/Rainfall_Trends_in_India_Analysis/rainfall_area-wt_India_1901-2015.csv')

In [3]:
data.head()

Unnamed: 0,REGION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,INDIA,1901,34.7,37.7,18.0,39.3,50.8,113.4,242.2,272.9,124.4,52.7,38.0,8.3,1032.3,72.4,108.1,752.8,99.0
1,INDIA,1902,7.4,4.3,19.0,43.5,48.3,108.8,284.0,199.7,201.5,61.5,27.9,24.4,1030.2,11.7,110.8,794.0,113.8
2,INDIA,1903,17.0,8.3,31.3,17.1,59.5,118.3,297.0,270.4,199.1,117.9,36.9,17.7,1190.5,25.3,107.9,884.8,172.5
3,INDIA,1904,14.4,9.6,31.8,33.1,72.4,164.8,261.0,206.4,129.6,69.0,11.2,16.3,1019.8,24.0,137.4,761.8,96.6
4,INDIA,1905,25.3,20.9,42.7,33.7,55.7,93.3,252.8,200.8,178.4,51.4,9.7,10.5,975.3,46.2,132.2,725.4,71.6


In [4]:
import plotly.express as px

In [5]:
import plotly.graph_objects as go

In [6]:
#Analze trends in annual rainfall over the time
rainfall=data[['YEAR','ANNUAL']]

In [7]:
rainfall.head()

Unnamed: 0,YEAR,ANNUAL
0,1901,1032.3
1,1902,1030.2
2,1903,1190.5
3,1904,1019.8
4,1905,975.3


In [8]:
fig_annual=go.Figure()
fig_annual.add_trace(go.Scatter(
    x=rainfall['YEAR'],
    y=rainfall['ANNUAL'],
    mode='lines',
    name='Annual Rainfall',
    line=dict(color='blue',width=2),
    opacity=0.7
))

In [9]:
fig_annual.add_trace(go.Scatter(
    x=rainfall['YEAR'],
    y=[rainfall['ANNUAL'].mean()]*len(rainfall),
    mode='lines',
    name='Mean Rainfall',
    line=dict(color='red', dash='dash')
))

In [10]:
fig_annual.update_layout(
    title='Trend in Annual Rainfall in India',
    xaxis_title='Year',
    yaxis_title='Rainfall (in MM)',
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)

fig_annual.show()

In [11]:
#Identify months with the highest and lowest rainfall on average
months=['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
month_avg=data[months].mean()

higest_rainfall_month=month_avg.idxmax()
lowest_rainfall_month=month_avg.idxmin()
print(higest_rainfall_month, 'Month Receive Higest rainfall')
print(lowest_rainfall_month, 'Month Receive Lowest rainfall')

JUL Month Receive Higest rainfall
DEC Month Receive Lowest rainfall


In [12]:
fig_monthly=px.bar(
    x=month_avg.index,
    y=month_avg.values,
    labels={'x':'months','y':'Rainfall (in mm)'},
    title='Average Monthly Rainfall in India',
    text=month_avg.values
)

fig_monthly.show()

In [13]:
fig_monthly.add_hline(
    y=month_avg.mean(),
    line_dash='dash',
    line_color='red',
    annotation_text='Mean Rainfall',
    annotation_position='top right'
)

In [14]:
#Seasonal Rainfall Distribution
seasonal_columns=['Jan-Feb','Mar-May','Jun-Sep','Oct-Dec']
seasonal_avg=data[seasonal_columns].mean()

fig_seasonal=px.bar(
    x=seasonal_avg.index,
    y=seasonal_avg.values,
    labels={'x':'Season','y':'Rainfall (in mm)'},
    title='Seasonal Rainfall Distribution in India',
    text=seasonal_avg.values,
    color=seasonal_avg.values,
    color_continuous_scale=['gold','skyblue','green','orange']
)

In [15]:
fig_seasonal.update_traces(
    marker_line_color='black',
    marker_line_width=1
)

fig_seasonal.update_layout(
    template='plotly_white',
    height=500,
    coloraxis_colorbar=dict(title='mm')
)

fig_seasonal.update_layout(
    template='plotly_white',
    height=500
)

fig_seasonal.show()

In [16]:
#Calculating rolling average to assess climate change impact
data['10-Year Rolling Avg']=data['ANNUAL'].rolling(window=10).mean()

In [17]:
fig_climate_change=go.Figure()
fig_climate_change.add_trace(go.Scatter(
    x=data['YEAR'],
    y=data['ANNUAL'],
    mode='lines',
    name='Annual Rainfall',
    line=dict(color='blue',width=2),
    opacity=0.6
))

In [18]:
fig_climate_change.add_trace(go.Scatter(
    x=data['YEAR'],
    y=data['10-Year Rolling Avg'],
    mode='lines',
    name='10-Year Rolling Avg',
    line=dict(color='red',width=3)
))

In [19]:
fig_climate_change.update_layout(
    title=('Impact of Climate Change on Rainfall Patterns'),
    xaxis_title='Year',
    yaxis_title='Rainfall (in MM)',
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)

fig_climate_change.show()

In [20]:
from scipy.stats import pearsonr

In [21]:
#Identify drought and extreme rainfall years
mean_rainfall=data['ANNUAL'].mean()
std_dev_rainfall=data['ANNUAL'].std()

drought_years=data[data['ANNUAL']<(mean_rainfall-105*std_dev_rainfall)]
extream_rainfall_years=data[data['ANNUAL']>(mean_rainfall+1.5*std_dev_rainfall)]

In [22]:
#Correlating Seasonal rainfall with annual rainfall totals

seasonal_columns=['Jan-Feb','Mar-May','Jun-Sep','Oct-Dec']
seasonal_correlations={
    season:pearsonr(data[season],data['ANNUAL'])[0] for season in seasonal_columns
}

In [23]:
#Displaying results for drought/extreme years and correlations

drought_year_summary=drought_years[['YEAR','ANNUAL']].reset_index(drop=True)

extreme_rainfall_years_summary=extream_rainfall_years[['YEAR','ANNUAL']].reset_index(drop=True)

print('\nSeasonal Correlations:')
for season,correlation in seasonal_correlations.items():
  print(f'{season}: {correlation}')

seasonal_correlation_summary=pd.DataFrame.from_dict(seasonal_correlations,orient='index',columns=['Correlation'])

drought_year_summary,extreme_rainfall_years_summary,seasonal_correlation_summary


Seasonal Correlations:
Jan-Feb: 0.22891293292947246
Mar-May: 0.313057400198277
Jun-Sep: 0.9300269697680887
Oct-Dec: 0.5316478472878181


(Empty DataFrame
 Columns: [YEAR, ANNUAL]
 Index: [],
    YEAR  ANNUAL
 0  1917  1480.3
 1  1933  1393.5
 2  1956  1386.2
 3  1959  1382.1
 4  1961  1403.0
 5  1988  1351.0
 6  1990  1400.6,
          Correlation
 Jan-Feb     0.228913
 Mar-May     0.313057
 Jun-Sep     0.930027
 Oct-Dec     0.531648)

In [24]:
from sklearn.ensemble import IsolationForest

In [25]:
#Detect anomalous rainfall years based on annual data

isolation_forest=IsolationForest(contamination=0.05,random_state=42)
data['Annual_Anomaly']=isolation_forest.fit_predict(data[['ANNUAL']])

In [26]:
#Identify anomalies in annual rainfall

annual_anomalies=data[data['Annual_Anomaly']==-1]

In [27]:
#Detect anomalous months based on monthly data

monthly_data=data[['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']]
monthly_anomalies=isolation_forest.fit_predict(monthly_data)

In [28]:
#Add anomaly detection results for months

data['Monthly_Anomaly']=monthly_anomalies

monthly_anomalies_df=data[data['Monthly_Anomaly']==-1][['YEAR']+months]

In [29]:
fig_annual_anomalies= go.Figure()

In [30]:
fig_annual_anomalies.add_trace(go.Scatter(
    x=data['YEAR'],
    y=data['ANNUAL'],
    mode='lines',
    name='Annual Rainfall',
    line=dict(color='blue', width=2),
    opacity=0.6
))

In [31]:
fig_annual_anomalies.add_trace(go.Scatter(
    x=annual_anomalies['YEAR'],
    y=annual_anomalies['ANNUAL'],
    mode='markers',
    name='Anomalous Years',
    marker=dict(color='red',size=8,symbol='circle')
))

In [32]:
fig_annual_anomalies.add_hline(
    y=data['ANNUAL'].mean(),
    line_dash='dash',
    line_color='green',
    annotation_text='Mean Rainfall',
    annotation_position='bottom right'
)

In [33]:
fig_annual_anomalies.update_layout(
    title='Annual Rainfall Anomalies in India',
    xaxis_title='Year',
    yaxis_title=' Rainfall (in MM)',
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)

fig_annual_anomalies.show()

In [34]:
#Preparing data for monthly anomalies

monthly_anomalies=[]

for column in months:
  for _,row in monthly_anomalies_df.iterrows():
    monthly_anomalies.append({'Year':row['YEAR'], 'Month':column,'Rainfall':row[column]})
    monthly_anomalies_df_long=pd.DataFrame(monthly_anomalies)


In [35]:
fig_monthly_anomalies=px.line(
    data,
    x='YEAR',
    y=months,
    labels={'Year':'Year', 'value':'Rainfall (in MM)','variable':'months'},
    title='Monthly Rainfall Anomalies in India',
    color_discrete_sequence=px.colors.qualitative.Dark2
)

In [36]:
fig_monthly_anomalies.add_trace(go.Scatter(
    x=monthly_anomalies_df_long['Year'],
    y=monthly_anomalies_df_long['Rainfall'],
    mode='markers',
    name='Anomalous Months',
    marker=dict(color='red', size=5,symbol='circle')
))

In [37]:
fig_monthly_anomalies.update_layout(
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)

fig_monthly_anomalies.show()

In [38]:
#Correlation analysis between monsoon (Jun-Sep) rainfall and other seasons

seasonal_columns=['Jan-Feb','Mar-May','Jun-Sep','Oct-Dec']
monsoon_column='Jun-Sep'
relationship={}

for season in seasonal_columns:
  if season !=monsoon_column:
    corr,_=pearsonr(data[monsoon_column], data[season])
    relationship[season]=corr

In [39]:
correlation_data=pd.DataFrame({
    'season':list(relationship.keys()),
    'correlation coefficient':list(relationship.values())
})

In [40]:
fig=px.bar(
    correlation_data,
    x='season',
    y='correlation coefficient',
    title='Correlation Between Monsoon (Jun-Sep) Rainfall and other Season',
    labels={'Season':'Season','Correlation Coefficient':'Correlation Coefficient'},
    text='correlation coefficient',
    color='correlation coefficient',
    color_continuous_scale='Blues'
)

In [41]:
fig.add_hline(
    y=0,
    line_dash='dash',
    line_color='red',
    annotation_text='No Correlation',
    annotation_position='bottom left'
)

In [42]:
fig.update_traces(marker_line_color='black',marker_line_width=1,texttemplate='%{text:2f}')
fig.update_layout(
    template='plotly_white',
    height=500
)

fig.show()

In [43]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [44]:
#Prepare data for clustering

rainfall_features=data[['Jan-Feb','Mar-May','Jun-Sep','Oct-Dec','ANNUAL']]
scaler=StandardScaler()
scaled_features=scaler.fit_transform(rainfall_features)

In [45]:
#Perform K-Means clustering
KMeans=KMeans(n_clusters=3,random_state=42)
data['Rainfall_Cluster']=KMeans.fit_predict(scaled_features)

In [46]:
#MAP cluster labels to categories

cluster_labels={0:'Dry',1:'Normal',2:'Wet'}
data['Rainfall_Category']=data['Rainfall_Cluster'].map(cluster_labels)

In [47]:
fig=px.scatter(
    data,
    x='YEAR',
    y='ANNUAL',
    color='Rainfall_Category',
    title='Clustering of Years Based on Rainfall Patterns',
    labels={'YEAR':'Year','ANNUAL':'Annual Rainfall','Rainfall_Category':'Rainfall Category'},
    color_discrete_sequence=px.colors.qualitative.Dark2,
    hover_data={'Rainfall_Cluster':True,'Rainfall_Category':True}
)

In [48]:
fig.update_layout(
    template='plotly_white',
    legend_title='Rainfall Category',
    height=500
)

fig.show()

In [49]:
data['DATE']=pd.to_datetime(data['YEAR'],format='%Y')
annual_rainfall_ts=data.set_index('DATE')['ANNUAL']

In [51]:
# use: pip install prophet

In [52]:
from prophet import Prophet

In [53]:
#Prepare the data for prophet

prophet_data=annual_rainfall_ts.reset_index()
prophet_data.columns=['ds','y']

In [54]:
from prophet.plot import plot_plotly,plot_components_plotly

In [55]:
prophet_model=Prophet()
prophet_model.fit(prophet_data)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm6hlvhh2/7_8zux1m.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm6hlvhh2/p4jdfp1q.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=36536', 'data', 'file=/tmp/tmpm6hlvhh2/7_8zux1m.json', 'init=/tmp/tmpm6hlvhh2/p4jdfp1q.json', 'output', 'file=/tmp/tmpm6hlvhh2/prophet_model8hyxfxd_/prophet_model-20241126041652.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
04:16:52 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
04:16:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7929c07b7370>

In [57]:
#Create a future dataframe for the next 20 years
future=prophet_model.make_future_dataframe(periods=20,freq='YE')
forecast=prophet_model.predict(future)

In [59]:
fig_forecast=plot_plotly(prophet_model,forecast)


In [60]:
fig_forecast.update_layout(
    title='Annual Rainfall Forecast using Prophet',
    xaxis_title='Year',
    yaxis_title='Rainfall (in MM)',
    template='plotly_white',
    height=500
)

fig_forecast.show()