In [8]:
import pandas as pd
#importing data from new file 
climate_data = pd.read_csv("DataSet/climate_change.csv")

# Sort the dataset by year
climate_data = climate_data.sort_values(by='year')
# 
# Check for missing values
missing_values_before_imputation = climate_data.isna().sum()
print("\nMissing values beforeimputation:")
print(missing_values_before_imputation)

# Columns to impute
columns_to_impute = [
    'DrySoilDays_Summer_whole', 'Evap_Summer', 'ExtremeShortTermDryStress_Summer_whole',
    'FrostDays_Winter', 'NonDrySWA_Summer_whole', 'PPT_Winter', 'PPT_Summer', 'PPT_Annual',
    'T_Winter', 'T_Summer', 'T_Annual', 'VWC_Winter_whole', 'VWC_Spring_whole',
    'VWC_Summer_whole', 'VWC_Fall_whole'
]

# Function to impute missing values by grouping by year and scenario
def impute_missing_values(data, columns):
    for column in columns:
        data[column] = data.groupby(['year', 'scenario', 'long', 'lat'])[column].transform(lambda x: x.fillna(x.mean()))
    return data
#, 'treecanopy','Ann_Herb','Bare','Herb','Litter','Shrub'
# Impute missing values
climate_data_imputed = impute_missing_values(climate_data, columns_to_impute)

# Check for remaining missing values
missing_values_after_imputation = climate_data_imputed.isna().sum()
print("\nMissing values after imputation:")
print(missing_values_after_imputation)

# Columns to impute using max and min
columns_to_impute_extremes = {
    'Tmax_Summer': 'max',
    'Tmin_Winter': 'min'
}

# Function to impute missing values by grouping by year and scenario using max and min
def impute_missing_values_extremes(data, columns):
    for column, func in columns.items():
        if func == 'max':
            data[column] = data.groupby(['year', 'scenario', 'long', 'lat'])[column].transform(lambda x: x.fillna(x.max()))
        elif func == 'min':
            data[column] = data.groupby(['year', 'scenario', 'long', 'lat'])[column].transform(lambda x: x.fillna(x.min()))
    return data

# Impute missing values using max and min
climate_data_imputed = impute_missing_values_extremes(climate_data_imputed, columns_to_impute_extremes)

# Check for remaining missing values
missing_values_after_imputation = climate_data_imputed.isna().sum()
print("\nMissing values after imputation:")
print(missing_values_after_imputation)

# Save the imputed data to a new CSV file
climate_data_imputed.to_csv("DataSet/climate_change_imputed.csv", index=False)
print("\nThe imputed data has been saved to 'climate_change_imputed.csv'.")


  climate_data = pd.read_csv("DataSet/climate_change.csv")



Missing values beforeimputation:
long                                          0
lat                                           0
year                                          0
TimePeriod                                    0
RCP                                           0
scenario                                      0
treecanopy                                    0
Ann_Herb                                      0
Bare                                          0
Herb                                          0
Litter                                        0
Shrub                                         0
DrySoilDays_Summer_whole                  46874
Evap_Summer                               46874
ExtremeShortTermDryStress_Summer_whole    46880
FrostDays_Winter                          46874
NonDrySWA_Summer_whole                    46998
PPT_Winter                                22288
PPT_Summer                                22288
PPT_Annual                                29693
T_Wint

In [7]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd

app = Dash(__name__)

# Load the dataset
climate_data = pd.read_csv("DataSet/climate_change.csv")

# Check for missing values before imputation
missing_values_before_imputation = climate_data.isna().sum()

# Columns to impute
columns_to_impute = [
    'DrySoilDays_Summer_whole', 'Evap_Summer', 'ExtremeShortTermDryStress_Summer_whole',
    'FrostDays_Winter', 'NonDrySWA_Summer_whole', 'PPT_Winter', 'PPT_Summer', 'PPT_Annual',
    'T_Winter', 'T_Summer', 'T_Annual', 'VWC_Winter_whole', 'VWC_Spring_whole',
    'VWC_Summer_whole', 'VWC_Fall_whole'
]

# Function to impute missing values by grouping by year and scenario
def impute_missing_values(data, columns):
    for column in columns:
        data[column] = data.groupby(['year', 'scenario'])[column].transform(lambda x: x.fillna(x.mean()))
    return data

# Impute missing values
climate_data_imputed = impute_missing_values(climate_data, columns_to_impute)

# Columns to impute using max and min
columns_to_impute_extremes = {
    'Tmax_Summer': 'max',
    'Tmin_Winter': 'min'
}

# Function to impute missing values by grouping by year and scenario using max and min
def impute_missing_values_extremes(data, columns):
    for column, func in columns.items():
        if func == 'max':
            data[column] = data.groupby(['year', 'scenario'])[column].transform(lambda x: x.fillna(x.max()))
        elif func == 'min':
            data[column] = data.groupby(['year', 'scenario'])[column].transform(lambda x: x.fillna(x.min()))
    return data

# Impute missing values using max and min
climate_data_imputed = impute_missing_values_extremes(climate_data_imputed, columns_to_impute_extremes)

# Check for remaining missing values after imputation
missing_values_after_imputation = climate_data_imputed.isna().sum()

# Combine missing values data for plotting
missing_values_df = pd.DataFrame({
    'Variable': missing_values_before_imputation.index,
    'Missing Before': missing_values_before_imputation.values,
    'Missing After': missing_values_after_imputation.values
})

app.layout = html.Div([
    html.H4('Missing Values Before and After Imputation'),
    dcc.Dropdown(
        id="dropdown",
        options=[{"label": col, "value": col} for col in missing_values_df['Variable']],
        value=missing_values_df['Variable'][0],
        clearable=False,
    ),
    dcc.Graph(id="graph"),
])

@app.callback(
    Output("graph", "figure"), 
    Input("dropdown", "value"))
def update_bar_chart(column):
    filtered_df = missing_values_df[missing_values_df['Variable'] == column]
    fig = px.bar(filtered_df.melt(id_vars='Variable', value_vars=['Missing Before', 'Missing After']),
                 x='Variable', y='value', color='variable',
                 color_discrete_map={"Missing Before": "#89c77f", "Missing After": "#6e3824"},
                 labels={'value': 'Missing Values', 'variable': 'Imputation Status'})
    fig.update_layout(barmode='group')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)



Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.



In [11]:
import pandas as pd
import plotly.graph_objects as go

# Load the dataset
climate_data = pd.read_csv("DataSet/climate_change.csv")

# Select relevant columns for the spider plot
columns_precipitation = [
    'PPT_Summer',
    'PPT_Winter',
    'PPT_Annual'
]

columns_temperature = [
    'T_Summer',
    'T_Winter',
    'T_Annual'
]

# Ensure there are no missing values for the selected columns (or handle them appropriately)
climate_data_selected_precipitation = climate_data[columns_precipitation].dropna()
climate_data_selected_temperature = climate_data[columns_temperature].dropna()

# Calculate the mean values for the selected columns
mean_values_precipitation = climate_data_selected_precipitation.mean()
mean_values_temperature = climate_data_selected_temperature.mean()

# Create the spider plot
fig = go.Figure()

# Add trace for precipitation
fig.add_trace(go.Scatterpolar(
    r=mean_values_precipitation,
    theta=['Summer', 'Winter', 'Annual'],
    fill='toself',
    line=dict(color='#89c77f'),  # Green color for the line
    name='Precipitation'
))

# Add trace for temperature
fig.add_trace(go.Scatterpolar(
    r=mean_values_temperature,
    theta=['Summer', 'Winter', 'Annual'],
    fill='toself',
    line=dict(color='#6e3824'),  # Brown color for the line
    name='Temperature'
))

# Update the layout with a green and brown theme
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, max(max(mean_values_precipitation), max(mean_values_temperature))*1.1]
        )
    ),
    showlegend=True,
    title="Precipitation and Temperature Characteristics",
    plot_bgcolor='#dfe8dc',  # Light green background color
    paper_bgcolor='#dfe8dc',  # Light green background color
    font=dict(color='#6e3824')  # Brown color for the text
)

# Show the plot
fig.show()



Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.



In [12]:
import pandas as pd
import plotly.express as px

# Load the dataset
climate_data = pd.read_csv("DataSet/climate_change.csv")

# Ensure relevant columns are present and filter the data accordingly
columns = [
    'year', 'month', 'PPT_Summer', 'PPT_Winter', 'PPT_Annual', 
    'T_Summer', 'T_Winter', 'T_Annual'
]

# Assuming the dataset has 'year' and 'month' columns for creating a time series heatmap
climate_data_filtered = climate_data[columns].dropna()

# Melt the dataframe for a heatmap suitable format
climate_data_melted = climate_data_filtered.melt(
    id_vars=['year', 'month'], 
    value_vars=['PPT_Summer', 'PPT_Winter', 'PPT_Annual', 'T_Summer', 'T_Winter', 'T_Annual'],
    var_name='Metric', 
    value_name='Value'
)

# Create the heatmap using Plotly Express
fig = px.density_heatmap(
    climate_data_melted, 
    x='year', 
    y='month', 
    z='Value',
    facet_col='Metric',
    color_continuous_scale=['#dfe8dc', '#6e3824'],  # Green and brown theme
    title='Climate Characteristics Heatmap'
)

# Update layout to enhance the visualization
fig.update_layout(
    height=800,  # Adjust height for better readability
    plot_bgcolor='#dfe8dc',  # Light green background color
    paper_bgcolor='#dfe8dc',  # Light green background color
    font=dict(color='#6e3824')  # Brown color for the text
)

# Show the plot
fig.show()



Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.



KeyError: "['month'] not in index"