### Importing Libraries

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("flowerBlooms_Clean.csv")
df.head()


Unnamed: 0,Site,Type,Season,Area,GeometryType,Coordinates
0,Chino Hills,Wild,Spring,25382.057,MultiPolygon,"[[[[-117.68286416250682, 33.87223450781116], [..."
1,Carrizo Plain National Monument,Wild,Spring,354751.514,MultiPolygon,"[[[[-119.50857432459836, 34.87043932367128], [..."
2,Antelope Valley California Poppy Reserve,Wild,Spring,25182.333,MultiPolygon,"[[[[-118.50266751919736, 34.69779051559962], [..."
3,Red Hills,Plantation,Summer,14874.759,MultiPolygon,"[[[[-120.239271, 33.427726]]]]"
4,Golden Valley,Wild,Winter,364929.893,MultiPolygon,"[[[[-116.766138, 36.991884]]]]"


### Load Dataset & Encoding

In [10]:
print("Dataset Info:")
print(df.info())

season_map = {'Spring': 1, 'Summer': 2, 'Fall': 3, 'Winter': 4}
df['Season_num'] = df['Season'].map(season_map)
df['Site_num'] = df['Site'].astype('category').cat.codes
df['Type_num'] = df['Type'].astype('category').cat.codes
df['GeometryType_num'] = df['GeometryType'].astype('category').cat.codes

X = df[['Season_num', 'Site_num', 'Type_num', 'GeometryType_num']]
y = df['Area']


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Site          150 non-null    object 
 1   Type          150 non-null    object 
 2   Season        150 non-null    object 
 3   Area          150 non-null    float64
 4   GeometryType  150 non-null    object 
 5   Coordinates   150 non-null    object 
dtypes: float64(1), object(5)
memory usage: 7.2+ KB
None


### Model Training Prediction & Evaluation

In [3]:
#train test_aplit
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results.append({"Model": name, "MSE": mse, "R²": r2})

results_df = pd.DataFrame(results)
results_df.sort_values(by="R²", ascending=False)


Unnamed: 0,Model,MSE,R²
0,Linear Regression,16585290000.0,-0.030954
1,Random Forest,16607500000.0,-0.032335
2,XGBoost,19612030000.0,-0.219099
