In [None]:
from pysal.model import spreg
from pysal.lib import weights
from scipy import stats
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import osmnx as ox

In [None]:
!pip uninstall -y neuralforecast
!pip install neuralforecast

In [None]:
data = pd.read_csv("/kaggle/input/shellai-data/dataset/Biomass_History.csv")
data.head()

In [None]:
distance = pd.read_csv("/kaggle/input/shellai-data/dataset/Distance_Matrix.csv")

distance = distance.iloc[:, 1:].to_numpy()


**Plain LSTM training and hyperparamter optimization**

In [None]:
melted_df = pd.melt(data[["Index", "2010", "2011", "2012", "2013","2014", "2015", "2016", "2017"]], id_vars=['Index'], var_name='ds', value_name='y')
melted_df.rename(columns = {'Index':'unique_id'}, inplace = True)
year_mapping = {ds: idx for idx, ds in enumerate(sorted(melted_df['ds'].unique()))}

# Replace year values with mapped integer values
melted_df['ds'] = melted_df['ds'].replace(year_mapping)
melted_df.head()

In [None]:
train = melted_df.loc[melted_df['ds'] < 7]
valid = melted_df.loc[melted_df['ds'] == 7]
h = valid['ds'].nunique()

In [None]:
valid.count

In [None]:
print(len(train))

In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM
from neuralforecast.losses.pytorch import DistributionLoss

models = [LSTM(h=h,
               loss=DistributionLoss(distribution='Normal', level=[90]),
               max_steps=50*76,
               encoder_n_layers=2,
               encoder_hidden_size=200,
               context_size=2,
               valid_batch_size = 32,
               val_check_steps = 76,
               decoder_hidden_size=200,
               decoder_layers=2,
               learning_rate=1e-3,
               scaler_type='standard', 
               early_stop_patience_steps = 2*76
              )]

model = NeuralForecast(models=models, freq='Y')
model.fit(train, val_size = 1)

In [None]:
p =  model.predict(futr_df=valid).reset_index()
p = p.merge(valid[['ds','unique_id', 'y']], on=['ds', 'unique_id'], how='left')

In [None]:
from sklearn.metrics import mean_absolute_error
p.head()
loss = mean_absolute_error(p['y'], p['LSTM']) 
print(loss)

In [None]:
!pip install optuna

In [None]:
import torch
print(torch.cuda.is_available())
!pip install torch

In [None]:

from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM
from neuralforecast.losses.pytorch import DistributionLoss
from sklearn.metrics import mean_absolute_error
def objective(trial):
    encoder_n_layers = trial.suggest_int('encoder_n_layers', 1, 3)
    encoder_hidden_size = trial.suggest_categorical('encoder_hidden_size', [64, 128, 256])
    decoder_layers = trial.suggest_int('decoder_layers', 1, 3)
    decoder_hidden_size = trial.suggest_categorical('decoder_hidden_size', [64, 128, 256])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    context_size = trial.suggest_int('context_size', 1, 5)
    scaler_type = trial.suggest_categorical('scaler_type', ['standard', 'robust'])
    models = [LSTM(h=h,
                   loss=DistributionLoss(distribution='Normal', level=[90]),
                   max_steps=50*76,
                   encoder_n_layers=encoder_n_layers,
                   encoder_hidden_size=encoder_hidden_size,
                   context_size=context_size,
                   decoder_hidden_size=decoder_hidden_size,
                   early_stop_patience_steps = 2*76,
                   valid_batch_size = 32,
                   val_check_steps = 76,
                   decoder_layers=decoder_layers,
                   learning_rate=learning_rate,
                   scaler_type=scaler_type,
                   )]

    model = NeuralForecast(models=models, freq='Y')
    model.fit(train, val_size = 1)

    p = model.predict(futr_df=valid).reset_index()
    p = p.merge(valid[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

    loss = mean_absolute_error(p['y'], p['LSTM']) 

    return loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

In [None]:
study.best_params

In [None]:

from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM
from neuralforecast.losses.pytorch import DistributionLoss, MAE
from sklearn.metrics import mean_absolute_error
models = [LSTM(h=h,
                   loss=DistributionLoss(distribution='Normal', level=[90]),
                   max_steps=2*76,
                   encoder_n_layers=1,
                   encoder_hidden_size=7,
                   context_size=100,
                   decoder_hidden_size=7,
                   early_stop_patience_steps = 0.5*76,
                   valid_batch_size = 32,
                   val_check_steps = 76,
                   decoder_layers=1,
                   learning_rate=0.0008324051764680218,
                   scaler_type='robust',
                   )]

model = NeuralForecast(models=models, freq='Y')
model.fit(train, val_size = 1)

p = model.predict(futr_df=valid).reset_index()
p = p.merge(valid[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

loss = mean_absolute_error(p['y'], p['LSTM']) 
print(loss)

In [None]:
p.head()

# N- beats forecast

In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS 
from neuralforecast.losses.pytorch import DistributionLoss

models = [NBEATS(h=h,input_size=3,
                 loss=DistributionLoss(distribution='Poisson', level=[90]),
                 max_steps=5*76,
                 stack_types = ['identity', 'trend'],
                 val_check_steps = 76,
                 n_blocks = [16, 8],
                 scaler_type='standard', 
                accelerator = "gpu"
)]

model = NeuralForecast(models=models, freq='Y')
model.fit(train, val_size = 2)

p =  model.predict(futr_df=valid).reset_index()
p = p.merge(valid[['ds','unique_id', 'y']], on=['ds', 'unique_id'], how='left')
p.head()

In [None]:
from sklearn.metrics import mean_absolute_error
import optuna

def objective(trial):
    input_size = trial.suggest_int('input_size', 1, 6)
    
    
    n_blocks_trend = trial.suggest_int('n_blocks_trend', 1, 3)
    n_blocks_identity = trial.suggest_int('n_blocks_ident', 1, 3)
    
    mlp_units_n = trial.suggest_categorical('mlp_units', [8, 16, 32, 64, 128])
    num_hidden = trial.suggest_int('num_hidden', 1, 3)
    
    n_harmonics = trial.suggest_int('n_harmonics', 0, 2)
    n_polynomials = trial.suggest_int('n_polynomials', 1, 5)
    
    scaler_type = trial.suggest_categorical('scaler_type', ['standard', 'robust'])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    
    
    n_blocks = [n_blocks_trend, n_blocks_identity]
    mlp_units=[[mlp_units_n, mlp_units_n]]*num_hidden
    models = [NBEATS(h=h,input_size=input_size,
                 loss=DistributionLoss(distribution='Poisson', level=[90]),
                 max_steps=5*76,
                 stack_types=['trend', 'identity'],
                 mlp_units=mlp_units,
                 n_blocks=n_blocks,
                 learning_rate=learning_rate,
                 n_harmonics=n_harmonics,
                 n_polynomials=n_polynomials,
                 scaler_type=scaler_type)
                 ]
    model = NeuralForecast(models=models, freq='Y')
    model.fit(train)

    p = model.predict(futr_df=valid).reset_index()
    p = p.merge(valid[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

    loss = mean_absolute_error(p['y'], p['NBEATS']) 

    return loss
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

Less complexity = better performance


In [None]:
loss = mean_absolute_error(p['y'], p['NBEATS']) 
print(loss)

In [None]:

integer_to_year = {0: pd.Timestamp('2010'), 1: pd.Timestamp('2011'), 2: pd.Timestamp('2012'), 3:pd.Timestamp('2013'), 4:pd.Timestamp('2014'), 5:pd.Timestamp('2015'), 6:pd.Timestamp('2016'), 7:pd.Timestamp('2017')}
train['ds'] = train['ds'].replace(integer_to_year)
valid['ds'] = valid['ds'].replace(integer_to_year)



In [None]:
data["geometry"] = gpd.points_from_xy(data["Longitude"], data["Latitude"])
data = gpd.GeoDataFrame(data, crs="epsg:4326")
data.head()

In [None]:
import matplotlib.pyplot as plt
data.plot(marker='o', color='blue', markersize=10)
plt.title('Spatial Distribution of Points')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

In [None]:
import pysal.lib as ps
from pysal.explore import esda

# Calculate spatial weights matrix
w = ps.weights.Queen.from_dataframe(data)
# ps.weights.KNN.from_dataframe(data, k = 5)
print(w)
# Calculate Moran's I
moran = esda.Moran(data['2016'], w)

print("Moran's I:", moran.I)
print("Moran's p-value:", moran.p_sim)

In [None]:
esda.moran_scatterplot(moran, aspect_equal=True)
plt.show()

In [None]:
import contextily
f, ax = plt.subplots(1, figsize=(9, 9))
data.plot(
    column="2016",
    cmap="viridis",
    scheme="quantiles",
    k=5,
    edgecolor="white",
    linewidth=0.0,
    alpha=0.75,
    legend=True,
    legend_kwds={"loc": 2},
    ax=ax,
)
contextily.add_basemap(
    ax,
    crs=data.crs,
    source=contextily.providers.Stamen.TerrainBackground,
)
ax.set_axis_off()

In [None]:
m3 = spreg.GM_Lag(data[['2016']].values, data.iloc[:, 5:9].values,
                  w=w,
                  name_y = 'ln(price)', name_x = ['2012', '2013','2014','2015'])

In [None]:
print(m3.summary)

In [None]:
from sklearn.metrics import mean_absolute_error as mse
print(mse(data["2016"], m3.predy_e))

In [None]:
import pysal.lib as ps
from pysal.explore import esda

# Calculate spatial weights matrix
w = ps.weights.Queen.from_dataframe(data)
columns_to_lag = data.columns[3:11]

# Iterate over the columns and calculate spatially lagged variables
for column in columns_to_lag:
    lagged_column = f'{column}_lagged'
    data[lagged_column] = ps.weights.spatial_lag.lag_spatial(w, data[column].values)
data.head()

In [None]:
# List of years for which you have columns
years = ["2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017"]

# Separate DataFrames for original and lagged values
original_df = data[["Index"] + years].rename(columns={'Index': 'unique_id'})
lagged_df = data[["Index"] + [f"{year}_lagged" for year in years]].rename(columns={'Index': 'unique_id'})

# Melt the DataFrames
melted_original = pd.melt(original_df, id_vars=['unique_id'], var_name='ds', value_name='y')
melted_lagged = pd.melt(lagged_df, id_vars=['unique_id'], var_name='ds', value_name='y_lagged')

# Select only the relevant columns from melted_original
melted_original_selected = melted_original[['unique_id', 'ds', 'y']]

# Concatenate the selected columns from melted_original with melted_lagged
melted_combined = pd.concat([melted_original_selected, melted_lagged['y_lagged']], axis=1)
year_mapping = {ds: idx for idx, ds in enumerate(sorted(melted_combined['ds'].unique()))}
melted_combined['ds'] = melted_combined['ds'].replace(year_mapping)
melted_combined.head()

In [None]:
train = melted_combined.loc[melted_combined['ds'] < 7]
valid = melted_combined.loc[melted_combined['ds'] == 7]
h = valid['ds'].nunique()

In [None]:
valid.shape

In [None]:
train.info

# TFT


In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.models import TFT
from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, GMM, PMM
from sklearn.metrics import mean_absolute_error

models=[TFT(h=2, input_size=3,
                hidden_size=8,
                #loss=DistributionLoss(distribution='Poisson', level=[80, 90]),
                #loss=DistributionLoss(distribution='Normal', level=[80, 90]),
                loss=DistributionLoss(distribution='StudentT', level=[80, 90]),
                learning_rate=0.005,
                attn_dropout = 0.3, 
                n_head = 4,
                futr_exog_list=['y_lagged'],
                max_steps=500,
                val_check_steps=10,
#                 early_stop_patience_steps=10,
                scaler_type='robust',
                windows_batch_size=None,
                enable_progress_bar=True, 
      ),
    ]

model = NeuralForecast(models=models, freq='Y')
model.fit(melted_combined)

# p = model.predict(futr_df=valid).reset_index()
# p = p.merge(valid[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

# loss = mean_absolute_error(p['y'], p['TFT']) 
# print(loss)



In [None]:
test = pd.read_csv("/kaggle/input/shellai-data/dataset/sample_submission.csv")
test = test.drop(['destination_index'], axis=1)
test = test.drop(test[test['data_type'] != "biomass_forecast"].index)
test = test.drop(['data_type'], axis = 1).reset_index()
test.rename(columns = {'year':'ds', 'source_index':'unique_id',
                              'value':'y'}, inplace = True)
test = test.drop(['index'], axis = 1)
test['ds'] = test['ds'].replace({2018: 8, 2019:9})
test = test[['unique_id', 'ds', 'y']]
test = test.merge(data[['Index', 'geometry']], left_on='unique_id', right_on='Index', how='left')

# Drop the redundant 'Index' column
test.drop(columns=['Index'], inplace=True)



# Get unique variable names
variables = test['ds'].unique()

# Iterate over the variables and calculate spatially lagged variables
for variable in variables:
    
    mask = test['ds'] == variable
    w = ps.weights.Queen.from_dataframe(test.loc[mask], geom_col='geometry')
    lagged_values = ps.weights.spatial_lag.lag_spatial(w, test.loc[mask, 'y'].values)
    test.loc[mask, ["y_lagged"]] = lagged_values
test.drop(columns=['geometry'], inplace=True)
test.head()

In [None]:
weights = test.loc[test['ds'] == 8]
weights.head()

In [None]:
p = model.predict(futr_df=test).reset_index()
p = p.merge(test[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

loss = mean_absolute_error(p['y'], p['TFT']) 
print(loss)

In [None]:
submission = p.copy()
submission['ds'] = submission['ds'].replace({8: 2018, 9:2019})
submission = submission[["unique_id", "ds", "TFT"]]
submission.rename(columns = {'ds':'year', 'unique_id':'source_index',
                              'TFT':'value'}, inplace = True)
submission["data_type"]= "biomass_forecast"
submission.head()
  

In [None]:
submission.to_csv("/kaggle/working/submission.csv", index=False)

In [None]:
from ray import tune

from neuralforecast.auto import AutoTFT
from neuralforecast.core import NeuralForecast
from neuralforecast.losses.pytorch import MAE

import logging
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
horizon = 2
models = [AutoTFT(h=horizon,
                  loss=MAE(),
                  config=None,
                  num_samples=4, 
                 verbose = True, 
                 cpus=4)]
nf = NeuralForecast(
    models=models,
    freq='Y')

nf.fit(df=melted_combined)


In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.models import Informer
from neuralforecast.losses.pytorch import MQLoss, DistributionLoss, MAE
models=[Informer(h=h,
                 input_size=3,
                 hidden_size=8,
                 conv_hidden_size=32,
                 n_head=4,
                 loss=MAE(),
                 futr_exog_list=['y_lagged'],
                 scaler_type='robust',
                 learning_rate=1e-3,
                 max_steps=500,
                 val_check_steps=50,
                 early_stop_patience_steps=2)]
model = NeuralForecast(models=models, freq='Y')
model.fit(train, val_size= 1)

p = model.predict(futr_df=valid).reset_index()
p = p.merge(valid[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

loss = mean_absolute_error(p['y'], p['Informer']) 
print(loss)

In [None]:
!pip install pulp

In [None]:
target_percentage = 0.8

sorted_series = test.loc[test['ds'] == 8]['y'].sort_values(ascending=False)
total_sum = test.loc[test['ds'] == 8]['y'].sum()

current_sum = 0
n = 0

for value in sorted_series:
    current_sum += value
    n += 1
    if current_sum >= target_percentage * total_sum:
        break

print("Number of values (n):", n)
test.loc[test['ds'] == 8]['y'].nlargest(1003).index


In [None]:
import pulp
locations = data.geometry
depot_candidates = test.loc[test['ds'] == 8]['y'].nlargest(1003).index
weights = test.loc[test['ds'] == 8]['y'].nlargest(1003)
# high_biomass_threshold = 204  # Adjust this threshold as needed
# high_biomass_candidates = [j for j in depot_candidates if weights[j] > high_biomass_threshold]
# print(len(high_biomass_candidates))

model = pulp.LpProblem("CFLModel", pulp.LpMinimize)

# initialize facility vector
f_vec = pulp.LpVariable.dict("d_vec",
            [j for j in depot_candidates], cat="Binary")
# initialize binary variable for incident-facility mapping
if_mat = pulp.LpVariable.dicts("if_mat",
            [(i,j) for i in depot_candidates for j in depot_candidates], cat="Binary")
# objective function - weighted manhattan sum
model += (pulp.lpSum([distance[i][j] * weights[i] * if_mat[(i,j)] 
            for i in depot_candidates for j in depot_candidates]))
# every incident must be served
for i in depot_candidates:
    model += pulp.lpSum(if_mat[(i, j)] for j in depot_candidates)==1
    
# every incident mapped to facility, given facility must exists
for i in depot_candidates:
    for j in depot_candidates:
        model += if_mat[(i, j)] <= f_vec[j]

# we are limited to 5 facilities
model += pulp.lpSum(f_vec[j] for j in depot_candidates)==25
# facilities can serve incidents within limited capacity
for j in depot_candidates:
    model += pulp.lpSum(if_mat[(i, j)] * weights[i] for i in depot_candidates)\
        <=20000


In [None]:

# try to solve it
model.solve()

print("Solving the model results in **{}** status and objective function of {}.".\
    format(pulp.LpStatus[model.status].lower(), np.round(pulp.value(model.objective),2)))

In [None]:
fac_loc = pd.DataFrame(index=[j for j in depot_candidates if f_vec[j].varValue==1])
cust_loc = pd.DataFrame([{'fac_id':j } for i in depot_candidates
                for j in depot_candidates if if_mat[(i,j)].varValue==1], index=depot_candidates)

for i in range(fac_loc.shape[0]):
    print(fac_loc.index[i])

In [None]:
!pip install pyomo

In [None]:
from pyomo.environ import *

locations = data.geometry
depot_candidates = data.Index.nlargest(200)
weights = data.iloc[:, 3:11].mean(axis=1)
high_biomass_threshold = 204  # Adjust this threshold as needed
high_biomass_candidates = [j for j in depot_candidates if weights[j] > high_biomass_threshold]
print(len(high_biomass_candidates))

# Create a Pyomo ConcreteModel
model = ConcreteModel()

# Initialize facility vector for high biomass candidates
model.d_vec = Var(high_biomass_candidates, domain=Binary)

# Initialize binary variable for incident-facility mapping
model.if_mat = Var(depot_candidates, high_biomass_candidates, domain=Binary)

# Objective function - weighted Manhattan sum
model.obj = Objective(
    expr=sum(distance[i][j] * weights[i] * model.if_mat[i, j] 
             for i in depot_candidates for j in high_biomass_candidates),
    sense=minimize
)

# Every incident must be served
model.serve_one = ConstraintList()
for i in depot_candidates:
    model.serve_one.add(
        sum(model.if_mat[i, j] for j in high_biomass_candidates) == 1
    )

# Every incident mapped to a facility, given facility must exist
model.incident_to_facility = ConstraintList()
for i in depot_candidates:
    for j in high_biomass_candidates:
        model.incident_to_facility.add(
            model.if_mat[i, j] <= model.d_vec[j]
        )

# We are limited to 5 facilities
model.facility_limit = Constraint(
    expr=sum(model.d_vec[j] for j in high_biomass_candidates) == 25
)

# Facilities can serve incidents within limited capacity
model.capacity_limit = ConstraintList()
for j in high_biomass_candidates:
    model.capacity_limit.add(
        sum(model.if_mat[i, j] * weights[i] for i in depot_candidates) <= 20000
    )

# Solve the model
solver = SolverFactory('cbc')
solver.options['tmlim'] = 60*10
results = solver.solve(model)

# Print results
print("Solver Status:", results.solver.status)
print("Objective Value:", value(model.obj))


In [None]:
!pip install gurobipy

In [None]:
import pulp
import gurobipy as gp
from gurobipy import GRB

locations = data.geometry
depot_candidates = data.Index.nlargest(100)
# weights = data.iloc[:, 3:11].mean(axis=1)
weight = weights['y']
# Create a Gurobi model
model = gp.Model("CFLModel")

# Initialize facility vector
f_vec = {j: model.addVar(vtype=GRB.BINARY, name=f"d_vec_{j}") for j in depot_candidates}

# Initialize binary variable for incident-facility mapping
if_mat = {(i, j): model.addVar(vtype=GRB.BINARY, name=f"if_mat_{i}_{j}") 
          for i in depot_candidates for j in depot_candidates}

# Objective function - weighted Manhattan sum
model.setObjective(
    gp.quicksum(distance[i][j] * weight[i] * if_mat[(i, j)] 
                for i in depot_candidates for j in depot_candidates),
    GRB.MINIMIZE
)

# Every incident must be served
for i in depot_candidates:
    model.addConstr(
        gp.quicksum(if_mat[(i, j)] for j in depot_candidates) == 1,
        name=f"serve_one_{i}"
    )

# Every incident mapped to a facility, given facility must exist
for i in depot_candidates:
    for j in depot_candidates:
        model.addConstr(
            if_mat[(i, j)] <= f_vec[j],
            name=f"incident_to_facility_{i}_{j}"
        )

# We are limited to 5 facilities
model.addConstr(
    gp.quicksum(f_vec[j] for j in depot_candidates) == 25,
    name="facility_limit"
)

# Facilities can serve incidents within limited capacity
for j in depot_candidates:
    model.addConstr(
        gp.quicksum(if_mat[(i, j)] * weight[i] for i in depot_candidates) <= 20000,
        name=f"capacity_limit_{j}"
    )
# model.setParam('TimeLimit', 10)
# Optimize the model
model.optimize()

# Print results
print("Status:", model.Status)
if model.Status == GRB.OPTIMAL:
    for var in model.getVars():
        print(var.VarName, var.x)
    print("Objective:", model.ObjVal)


In [None]:
data.iloc[:, 3:].T.head()

In [None]:
# y1 = data.iloc[:, 3:11].values.rolling(7).mean()
y = data.iloc[:, 3:11].values
x = ["2010","2011","2012","2013","2014","2015","2016","2017"]
# print(x[:10])
# sns.lineplot(x= x,y = y[1020])
# geodata = data.swapaxes("index", "columns")
# geodata.head()
sns.lineplot(data.iloc[:, 3:].T.iloc[:,0+209:5+209])

In [None]:
pd.plotting.lag_plot(data.iloc[:, 3:].T, lag=7)

In [None]:
values = pd.DataFrame(data.iloc[100+40:101+40, 3:].T.values)
# using shift function to shift the values.
dataframe = pd.concat([values.shift(3), values.shift(2),
                       values.shift(1), values], axis=1)
# naming the columns
dataframe.head()
 
# # using corr() function to compute the correlation
result = dataframe.corr()
 
print(result)

In [None]:
import matplotlib.pyplot as plt
biowaste_df = data.iloc[:, :11]

# Transpose the DataFrame and set the index to Latitude and Longitude
biowaste_df.set_index(['Latitude', 'Longitude'], inplace=True)
biowaste_df = biowaste_df.T.reset_index()

# Melt the DataFrame to have a 'Year' column and a 'Biowaste Value' column
biowaste_df = biowaste_df.melt(id_vars='index', var_name='Year', value_name='Biowaste Value')
biowaste_df.rename(columns={'index': 'Coordinate'}, inplace=True)

# Plot the data using Seaborn
plt.figure(figsize=(10, 6))
sns.lineplot(x='Year', y='Biowaste Value', hue='Coordinate', data=biowaste_df, marker='o')
plt.xlabel('Year')
plt.ylabel('Biowaste Value')
plt.title('Biowaste Values over Years for Different Coordinates')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.show()


In [None]:
data.isna().sum()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data.iloc[:, 3:11] = scaler.fit_transform(data.iloc[:, 3:11])

data.head()

In [None]:
#ols model
w = weights.KNN.from_dataframe(data, k=8)
w.transform = 'R'
w
model1 = spreg.OLS(data[['2017']].values, data.iloc[:, 3:11].values,
                  name_y = '2017_pred', name_x = ["2010", "2011","2012","2013","2014","2015","2016"])
# preds = scaler.inverse_transform(model1.predy)

print(model1.summary)


In [None]:
from sklearn.metrics import mean_squared_error as mse

mses = pd.Series({'OLS': mse(data["2017"], model1.predy.flatten())})
print(mses)