<a href="https://colab.research.google.com/github/Utpal-Mishra/Omdena-Malaysia-2021-Climate-Risk-Prediction-Part2/blob/main/OmdenaMalaysia2021ClimateRiskPredictionPart2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **IMPORT DRIVE**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

print("Drive Mounted!!!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive Mounted!!!


### **IMPORT LIBRARIES**

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

print("Libraries Imported!!!")

Libraries Imported!!!


### **IMPORT DATA**

In [None]:
import datetime

path = "/content/drive/MyDrive/Omdena/Malaysia 2021/Task 1 Climate Risk Prediction/ClimateChange.csv"

data = pd.read_csv(path, skiprows=1)
data.rename(columns = {'#country+name':'Country Name', '#country+code':'Country Code', '#date+year':'Date', 
             '#indicator+name':'Indicator Name', '#indicator+code':'Indicator Code', '#indicator+value+num':'Value'}, inplace = True)
print("Data Shape: ", data.shape)

data = data.sort_values(['Indicator Name', 'Date'])
data.Date = data.Date.apply(lambda x: str(x)+"-01-01")
data = data.reset_index()
data = data[['Date', 'Indicator Name', 'Value']]
data.Date = pd.to_datetime(data.Date).dt.date
# data.Date = data.Date.astype(int)
# data.Value = (data.Value - data.Value.min())/(data.Value.max() - data.Value.min())
data.Value = round(data.Value).astype(int)
data.head()

Data Shape:  (1896, 6)


Unnamed: 0,Date,Indicator Name,Value
0,2009-01-01,Access to electricity (% of population),99
1,2010-01-01,Access to electricity (% of population),99
2,2011-01-01,Access to electricity (% of population),100
3,2012-01-01,Access to electricity (% of population),100
4,2013-01-01,Access to electricity (% of population),100


## **EXPLORATORY DATA ANALYSIS**

### **Checking Missing Values**

In [None]:
data.isna().sum()

Date              0
Indicator Name    0
Value             0
dtype: int64

### **Frequency of Indicator Names in Malaysia**

In [None]:
data['Indicator Name'].value_counts() 

Urban population (% of total population)                                             60
Mortality rate, under-5 (per 1,000 live births)                                      60
Population in urban agglomerations of more than 1 million (% of total population)    60
Agriculture, forestry, and fishing, value added (% of GDP)                           60
Population, total                                                                    60
                                                                                     ..
Ease of doing business index (1=most business-friendly regulations)                   1
GHG net emissions/removals by LUCF (Mt of CO2 equivalent)                             1
Droughts, floods, extreme temperatures (% of population, average 1990-2009)           1
Disaster risk reduction progress score (1-5 scale; 5=best)                            1
Community health workers (per 1,000 people)                                           1
Name: Indicator Name, Length: 64

## **PLOTS**

### **Plotting Functions**

In [None]:
import plotly.express as px

def plot(df, title, name, color, dash):
  fig = go.Figure()
  fig = px.scatter(df, x=df.index, y="Value", size = "Value", color = "Value")
  fig.add_trace(go.Scatter(x=df.index, y=df.Value, name = name, line=dict(color=color, width=2, dash = dash)))
  fig.update_xaxes(title_text = "Date", rangeslider_visible=True, linewidth=2, linecolor='black', mirror=True)
  fig.update_yaxes(title_text = "Value", showline=True, linewidth=2, linecolor='black', mirror=True)
  fig.update_layout(height=500, width=1000, title_text=title, xaxis_tickangle=-90) 
  fig.show()

### **Access to electricity (% of population)** 

In [None]:
df = data[data["Indicator Name"] == "Access to electricity (% of population)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Electricity Consumption in Malaysia", "Malaysia Electricity Consumption", "red", "dashdot")

### **Agricultural irrigated land (% of total agricultural land)** 

In [None]:
df = data[data["Indicator Name"] == "Agricultural irrigated land (% of total agricultural land)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Irrigational Land Coverage in Malaysia", "Malaysia Irrigated Land", "red", "dash")

### **Arable land (% of land area)** 

In [None]:
df = data[data["Indicator Name"] == "Arable land (% of land area)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Arable Land Coverage in Malaysia", "Malaysia % Land Coverage", "red", "dot")

### **Agriculture, forestry, and fishing, value added (% of GDP)** 

In [None]:
df = data[data["Indicator Name"] == "Agriculture, forestry, and fishing, value added (% of GDP)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Agriculture, forestry, and fishing, value added (% of GDP) in Malaysia", "Agriculture, forestry, and fishing % in Malaysia GDP", "red", "dot")

### **Annual freshwater withdrawals, total (% of internal resources)** 

In [None]:
df = data[data["Indicator Name"] == "Annual freshwater withdrawals, total (% of internal resources)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Annual freshwater withdrawals, total (% of internal resources) in Malaysia", "Malaysia % Annual freshwater withdrawals", "red", "dot")

### **Average precipitation in depth (mm per year)** 

In [None]:
df = data[data["Indicator Name"] == "Average precipitation in depth (mm per year)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Average precipitation in depth (mm per year) in Malaysia", "Malaysia Average precipitation", "red", "dot")

### **CO2 emissions (kg per 2010 US$ of GDP)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions (kg per 2010 US$ of GDP)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions (kg per 2010 US$ of GDP) in Malaysia", "CO2 emissions (kg per 2010 US$ of GDP)", "red", "dot")

### **CO2 emissions (kg per 2017 PPP $ of GDP)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions (kg per 2017 PPP $ of GDP)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions (kg per 2017 PPP $ of GDP) in Malaysia", "CO2 emissions (kg per 2017 PPP $ of GDP)", "red", "dot")

### **CO2 emissions (kg per PPP $ of GDP)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions (kg per PPP $ of GDP)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions (kg per PPP $ of GDP) in Malaysia", "CO2 emissions (kg per PPP $ of GDP)", "red", "dot")

### **CO2 emissions (kt)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions (kt)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions (kt) in Malaysia", "CO2 emissions (kt)", "red", "dot")

### **CO2 emissions (metric tons per capita)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions (metric tons per capita)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions (metric tons per capita) in Malaysia", "CO2 emissions (metric tons per capita)", "red", "dot")

### **CO2 emissions from gaseous fuel consumption (% of total)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions from gaseous fuel consumption (% of total)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions from gaseous fuel consumption (% of total) in Malaysia", "CO2 emissions from gaseous fuel consumption (% of total)", "red", "dot")

### **CO2 emissions from gaseous fuel consumption (kt)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions from gaseous fuel consumption (kt)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions from gaseous fuel consumption (kt) in Malaysia", "CO2 emissions from gaseous fuel consumption (kt)", "red", "dot")

### **CO2 emissions from liquid fuel consumption (% of total)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions from liquid fuel consumption (% of total)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions from liquid fuel consumption (% of total) in Malaysia", "CO2 emissions from liquid fuel consumption (% of total)", "red", "dot")

### **CO2 emissions from solid fuel consumption (% of total)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions from solid fuel consumption (% of total)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions from solid fuel consumption (% of total) in Malaysia", "CO2 emissions from solid fuel consumption (% of total)", "red", "dot")

### **CO2 emissions from solid fuel consumption (kt)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 emissions from solid fuel consumption (kt)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 emissions from solid fuel consumption (kt) in Malaysia", "CO2 emissions from solid fuel consumption (kt)", "red", "dot")

### **CO2 intensity (kg per kg of oil equivalent energy use)** 

In [None]:
df = data[data["Indicator Name"] == "CO2 intensity (kg per kg of oil equivalent energy use)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "CO2 intensity (kg per kg of oil equivalent energy use) in Malaysia", "CO2 intensity (kg per kg of oil equivalent energy use)", "red", "dot")

### **Foreign direct investment, net inflows (% of GDP)** 

In [None]:
df = data[data["Indicator Name"] == "Foreign direct investment, net inflows (% of GDP)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Foreign direct investment, net inflows (% of GDP)) in Malaysia", "Foreign direct investment, net inflows (% of GDP)", "red", "dot")

### **Forest area (% of land area)** 

In [None]:
df = data[data["Indicator Name"] == "Forest area (% of land area)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Forest area (% of land area) in Malaysia", "Forest area (% of land area)", "red", "dot")

### **Forest area (sq. km))** 

In [None]:
df = data[data["Indicator Name"] == "Forest area (sq. km)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Forest area (sq. km) in Malaysia", "Forest area (sq. km))", "red", "dot")

### **HFC gas emissions (thousand metric tons of CO2 equivalent)** 

In [None]:
df = data[data["Indicator Name"] == "HFC gas emissions (thousand metric tons of CO2 equivalent)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "HFC gas emissions (thousand metric tons of CO2 equivalent) in Malaysia", "HFC gas emissions (thousand metric tons of CO2 equivalent)", "red", "dot")

### **Marine protected areas (% of territorial waters)** 

In [None]:
df = data[data["Indicator Name"] == "Marine protected areas (% of territorial waters)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Marine protected areas (% of territorial waters) in Malaysia", "Marine protected areas (% of territorial waters)", "red", "dot")

### **Methane emissions (% change from 1990)** 

In [None]:
# df = data[data["Indicator Name"] == "Methane emissions (% change from 1990)"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "Methane emissions (% change from 1990) in Malaysia", "Methane emissions (% change from 1990)", "red", "dot")

### **Methane emissions (kt of CO2 equivalent)** 

In [None]:
df = data[data["Indicator Name"] == "Methane emissions (kt of CO2 equivalent)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Methane emissions (kt of CO2 equivalent) in Malaysia", "Methane emissions (kt of CO2 equivalent)", "red", "dot")

### **Mortality rate, under-5 (per 1,000 live births))** 

In [None]:
df = data[data["Indicator Name"] == "Mortality rate, under-5 (per 1,000 live births)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Mortality rate, under-5 (per 1,000 live births) in Malaysia", "Mortality rate, under-5 (per 1,000 live births)", "red", "dot")

### **Nitrous oxide emissions (% change from 1990)** 

In [None]:
# df = data[data["Indicator Name"] == "Nitrous oxide emissions (% change from 1990)"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "Nitrous oxide emissions (% change from 1990) in Malaysia", "Nitrous oxide emissions (% change from 1990)", "red", "dot")

### **Nitrous oxide emissions (thousand metric tons of CO2 equivalent)** 

In [None]:
df = data[data["Indicator Name"] == "Nitrous oxide emissions (thousand metric tons of CO2 equivalent)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Nitrous oxide emissions (thousand metric tons of CO2 equivalent) in Malaysia", "Nitrous oxide emissions (thousand metric tons of CO2 equivalent)", "red", "dot")

### **Other greenhouse gas emissions (% change from 1990)** 

In [None]:
# df = data[data["Indicator Name"] == "Other greenhouse gas emissions (% change from 1990)"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "Other greenhouse gas emissions (% change from 1990)) in Malaysia", "Other greenhouse gas emissions (% change from 1990)", "red", "dot")

### **Other greenhouse gas emissions, HFC, PFC and SF6 (thousand metric tons of CO2 equivalent)** 

In [None]:
# df = data[data["Indicator Name"] == "Other greenhouse gas emissions, HFC, PFC and SF6 (thousand metric tons of CO2 equivalent)"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "Other greenhouse gas emissions, HFC, PFC and SF6 (thousand metric tons of CO2 equivalent) in Malaysia", "Other greenhouse gas emissions, HFC, PFC and SF6 (thousand metric tons of CO2 equivalent)", "red", "dot")

### **PFC gas emissions (thousand metric tons of CO2 equivalent)** 

In [None]:
df = data[data["Indicator Name"] == "PFC gas emissions (thousand metric tons of CO2 equivalent)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "PFC gas emissions (thousand metric tons of CO2 equivalent) in Malaysia", "PFC gas emissions (thousand metric tons of CO2 equivalent))", "red", "dot")

### **Population growth (annual %)** 

In [None]:
df = data[data["Indicator Name"] == "Population growth (annual %)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Population growth (annual %) in Malaysia", "Population growth (annual %)", "red", "dot")

### **Population in urban agglomerations of more than 1 million (% of total population)** 

In [None]:
df = data[data["Indicator Name"] == "Population in urban agglomerations of more than 1 million (% of total population)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Population in urban agglomerations of more than 1 million (% of total population) in Malaysia", "Population in urban agglomerations of more than 1 million (% of total population)", "red", "dot")

### **Population living in areas where elevation is below 5 meters (% of total population)** 

In [None]:
df = data[data["Indicator Name"] == "Population living in areas where elevation is below 5 meters (% of total population)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Population living in areas where elevation is below 5 meters (% of total population) in Malaysia", "Population living in areas where elevation is below 5 meters (% of total population)", "red", "dot")

### **Population, total** 

In [None]:
df = data[data["Indicator Name"] == "Population, total"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Population, total in Malaysia", "Population, total", "red", "dot")

### **Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population))** 

In [None]:
df = data[data["Indicator Name"] == "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)) in Malaysia", "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)", "red", "dot")

### **Prevalence of underweight, weight for age (% of children under 5)** 

In [None]:
df = data[data["Indicator Name"] == "Prevalence of underweight, weight for age (% of children under 5)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Prevalence of underweight, weight for age (% of children under 5) in Malaysia", "Prevalence of underweight, weight for age (% of children under 5)", "red", "dot")

### **Primary completion rate, total (% of relevant age group)** 

In [None]:
df = data[data["Indicator Name"] == "Primary completion rate, total (% of relevant age group)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Primary completion rate, total (% of relevant age group) in Malaysia", "Primary completion rate, total (% of relevant age group)", "red", "dot")

### **Renewable energy consumption (% of total final energy consumption)** 

In [None]:
df = data[data["Indicator Name"] == "Renewable energy consumption (% of total final energy consumption)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Renewable energy consumption (% of total final energy consumption) in Malaysia", "Renewable energy consumption (% of total final energy consumption)", "red", "dot")

### **SF6 gas emissions (thousand metric tons of CO2 equivalent)** 

In [48]:
# df = data[data["Indicator Name"] == "SF6 gas emissions (thousand metric tons of CO2 equivalent))"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "SF6 gas emissions (thousand metric tons of CO2 equivalent) in Malaysia", "SF6 gas emissions (thousand metric tons of CO2 equivalent)", "red", "dot")

### **Total greenhouse gas emissions (% change from 1990)** 

In [None]:
# df = data[data["Indicator Name"] == "Total greenhouse gas emissions (% change from 1990)"]
# df = df[["Date", "Value"]]
# df.set_index("Date", inplace = True)

# plot(df, "Total greenhouse gas emissions (% change from 1990) in Malaysia", "Total greenhouse gas emissions (% change from 1990)", "red", "dot")

### **Urban population (% of total population)** 

In [None]:
df = data[data["Indicator Name"] == "Urban population (% of total population)"]
df = df[["Date", "Value"]]
df.set_index("Date", inplace = True)

plot(df, "Urban population (% of total population) in Malaysia", "Urban population (% of total population)", "red", "dot")