In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from imblearn.over_sampling import SMOTE
from IPython.display import display
from sklearn.ensemble import RandomForestRegressor

In [None]:
pop = pd.read_csv(r'C:\Users\User\Desktop\EDurHack\Dataset\cleanData\woodchucks_with_wood_volume.csv')

pop = pop.fillna(0)

yearly_data = pop.sort_values(by='year')

lat = widgets.Dropdown(
    options=yearly_data['latitude'].unique().tolist(),
    description='Latitude:',
    disabled=False,
    value=None
) 

lons = yearly_data[yearly_data['latitude'] == lat]['longitude'].unique()
lon = widgets.Dropdown(
    options=[],  # Start empty
    description='Longitude:',
    disabled=False,
    value=None
)

def update_lon_options(change):
    selected_lat = change['new']
    if selected_lat is not None:
        lons = yearly_data[yearly_data['latitude'] == selected_lat]['longitude'].unique().tolist()
        lon.options = lons
        lon.value = lons[0] if lons else None
    else:
        lon.options = []
        lon.value = None

lat.observe(update_lon_options, names='value')

year = widgets.Dropdown(
    options=list(range(1, 51)),
    description='Year:',
    disabled=False,
    value=1
)

forecastButton = widgets.Button(
    description='Generate Forecast',
    disabled=False,
    button_style='success',
    tooltip='Click to forecast',
    icon='chart-line'
)

# Output widget for results
output = widgets.Output()

In [None]:
def forecast(b):
    with output:
        output.clear_output()
        
        selected_lat = lat.value
        selected_lon = lon.value
        forecast_years = year.value
        
        location_data = yearly_data[
            (yearly_data['latitude'] == selected_lat) & 
            (yearly_data['longitude'] == selected_lon)
        ].copy()
        
        location_data = location_data.sort_values('year').reset_index(drop=True)

        location_data['time_index'] = range(len(location_data))

        location_data['lag_1'] = location_data['total_wood_chucked_lbs'].shift(1)
        location_data['lag_2'] = location_data['total_wood_chucked_lbs'].shift(2)
        location_data['lag_3'] = location_data['total_wood_chucked_lbs'].shift(3)
        
        # Rolling mean
        location_data['rolling_mean_3'] = location_data['total_wood_chucked_lbs'].rolling(window=3, min_periods=1).mean()
        
        X = location_data[['year', 'time_index', 'lag_1', 'lag_2', 'lag_3', 'rolling_mean_3']]
        y = location_data['total_wood_chucked_lbs']  
    
        X_train, X_test = X, X.iloc[0:0]
        y_train, y_test = y, y.iloc[0:0]
        
        model = RandomForestRegressor(
            n_estimators=100,
            max_depth=10,
            min_samples_split=2,
            random_state=42,
            n_jobs=-1
        )

        model.fit(X_train, y_train)  

        future_predictions = []
        last_year = location_data['year'].iloc[-1]
        last_time_index = location_data['time_index'].iloc[-1]

        last_values = location_data['total_wood_chucked_lbs'].tail(3).values
        
        for i in range(forecast_years):
            future_year = last_year + i + 1
            future_time = last_time_index + i + 1
            
            # Create features for prediction
            future_features = {
                'year': future_year,
                'time_index': future_time,
                'lag_1': last_values[-1],
                'lag_2': last_values[-2] if len(last_values) >= 2 else last_values[-1],
                'lag_3': last_values[-3] if len(last_values) >= 3 else last_values[-1],
                'rolling_mean_3': np.mean(last_values[-3:]) if len(last_values) >= 3 else np.mean(last_values)
            }
            future_X = pd.DataFrame([future_features])
            pred = model.predict(future_X)[0]
            future_predictions.append(pred)
            last_values = np.append(last_values[1:], pred) if len(last_values) >= 3 else np.append(last_values, pred)

        fig, ax = plt.subplots(figsize=(14, 8))
        
        ax.plot(location_data['year'], location_data['total_wood_chucked_lbs'], 
                label='Historical Data', linewidth=2.5, marker='o', color='black', markersize=7)
        
        if len(X_test) > 0:
            test_years = location_data.iloc[-len(y_test):]['year']
            ax.plot(test_years, model.predict(X_test), label='Test Predictions', 
                   linewidth=2, linestyle='--', marker='s', color='orange', markersize=7)
        
        future_years = [last_year + i + 1 for i in range(forecast_years)]
        ax.plot(future_years, future_predictions, label=f'{forecast_years}-Year Forecast', 
               linewidth=3, linestyle=':', marker='D', color='green', markersize=8)
        
        ax.axvline(x=last_year, color='red', linestyle='-', alpha=0.3, 
                   linewidth=2, label='Forecast Start')
        
        ax.set_xlabel('Year', fontsize=13)
        ax.set_ylabel('Estimated Wood Chucked', fontsize=13)
        ax.set_title(f'Amount of Wood Chucked Forecast (Random Forest)\nLocation: ({selected_lat:.4f}, {selected_lon:.4f})', 
                    fontsize=15, fontweight='bold')
        ax.legend(loc='best', fontsize=11)
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

forecastButton.on_click(forecast)


In [11]:
print("ðŸ¦« WOODCHUCK POPULATION FORECASTING TOOL")
print("Select a location and click 'Generate Forecast'\n")

display(widgets.VBox([
    widgets.HBox([lat, lon, year]),
    forecastButton,
    output  
]))

ðŸ¦« WOODCHUCK POPULATION FORECASTING TOOL
Select a location and click 'Generate Forecast'



VBox(children=(HBox(children=(Dropdown(description='Latitude:', options=(39.9, 40.7, 42.2, 40.0, 41.2, 41.0, 4â€¦