In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

## Prepare the Data

In [None]:
df = pd.read_csv("Data/forestfires.csv")
print(df.shape)
df

In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)

## Charts 

In [None]:
# Changing months from string to integers for graphing and storing in monthDF, a copy of df
monthDF = df.copy()
month_dict = {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
              'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12}
monthDF['month'] = monthDF['month'].map(month_dict)

In [None]:
# FFMC amount per month
plt.scatter(monthDF["month"], monthDF["FFMC"], s=3)
plt.xlabel('Month')
plt.ylabel('FFMC')
plt.title('FFMC per Month')
plt.show()

In [None]:
# Burned ha per month
plt.scatter(monthDF["month"], monthDF["area"], s=3)
plt.xlabel('Month')
plt.ylabel('Burned Area in ha')
plt.title('Burned Area per hectare ')
plt.show()

In [None]:
# Number of fires per month
by_month = monthDF.groupby('month')['X'].count()
plt.plot(by_month.index, by_month.values)
plt.xlabel('Month')
plt.ylabel('Number of Occurrences')
plt.title('Fire Occurrences Over Time')
plt.show()

## Apply Logistic Regression

In [None]:
# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['month', 'day'])

In [None]:
# Convert the target variable to a binary variable indicating whether or not a fire occurred
# Threshold is mean of area column
threshold = 12.8
df['fire'] = (df['area'] > threshold).astype(int)

# Separate the features (X) from the target variable (Y)
X = df[['wind', 'temp', 'RH', 'rain']]
Y = df['fire']

In [None]:
# Standardize the features using a StandardScaler object
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Fit a logistic regression model to the standardized data
model = LogisticRegression()
model.fit(X, Y)

# Print the coefficients of the model
print(model.coef_)

## Apply SVM