In [None]:
import pandas as pd

url = 'https://github.com/ZhuofeiL/Wild-fire/blob/main/us_fire_2014_with_weather.csv?raw=true'
df = pd.read_csv(url)
print(df.head())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('https://github.com/ZhuofeiL/Wild-fire/blob/main/us_fire_2014_with_weather.csv?raw=true') 

df['date'] = pd.to_datetime(df['date'])

df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

print("Size of the dataset: ", df.shape)

plt.figure(figsize=(15, 7))
sns.countplot(x='month', hue='STATE', data=df)
plt.title('Number of Fires per State Over Time')
plt.legend(loc='upper right', title='State')
plt.xticks(rotation=45)
plt.show()

numeric_df = df.select_dtypes(include=[np.number])
sns.heatmap(numeric_df.corr(), annot=True)
plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

df = pd.read_csv('https://github.com/ZhuofeiL/Wild-fire/blob/main/us_fire_2014_with_weather.csv?raw=true')  # Replace 'your_dataset.csv' with the path to your dataset

df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month

grouped = df.groupby(['STATE', 'month'])
fires_per_month_state = grouped.size().reset_index(name='fire_count')
fires_per_month_state['avg_temperature'] = grouped['temperature(C)'].mean().values
fires_per_month_state['avg_east_wind'] = grouped['east_wind'].mean().values
fires_per_month_state['avg_north_wind'] = grouped['north_wind'].mean().values
fires_per_month_state['avg_humidity'] = grouped['Humidity'].mean().values

X = fires_per_month_state[['STATE', 'month', 'avg_temperature', 'avg_east_wind', 'avg_north_wind', 'avg_humidity']]
y = fires_per_month_state['fire_count']

ct = ColumnTransformer(
    [('state_encoder', OneHotEncoder(handle_unknown='ignore'), ['STATE']),
     ('numerical_scaler', StandardScaler(), ['month', 'avg_temperature', 'avg_east_wind', 'avg_north_wind', 'avg_humidity'])],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline(
    [('preprocessor', ct),
     ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))]
)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

def predict_fire_count(state, month, avg_temperature, avg_east_wind, avg_north_wind, avg_humidity):
    input_features = pd.DataFrame([[state, month, avg_temperature, avg_east_wind, avg_north_wind, avg_humidity]], 
                                  columns=['STATE', 'month', 'avg_temperature', 'avg_east_wind', 'avg_north_wind', 'avg_humidity'])
    fire_count_prediction = pipeline.predict(input_features)[0]
    return fire_count_prediction


In [None]:
predicted_fire_count = predict_fire_count('TX', 5, 26.0, 2.0, -1.0, 0.2)
print("Predicted Fire Count:", predicted_fire_count)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

df = pd.read_csv('https://github.com/ZhuofeiL/Wild-fire/blob/main/us_fire_2014_with_weather.csv?raw=true')
df['DISCOVERY_HOUR'] = df['DISCOVERY_TIME'] // 100
df['date'] = pd.to_datetime(df['date'])
df['MONTH'] = df['date'].dt.month

df['FIRE_OCCURRED'] = 1

negative_examples = pd.DataFrame({
    'STATE': np.random.choice(df['STATE'].unique(), len(df) * 10),
    'temperature(C)': np.random.choice(df['temperature(C)'], len(df) * 10),
    'MONTH': np.random.choice(df['MONTH'].unique(), len(df) * 10),
    'east_wind': np.random.choice(df['east_wind'], len(df) * 10),
    'north_wind': np.random.choice(df['north_wind'], len(df) * 10),
    'Humidity': np.random.choice(df['Humidity'], len(df) * 10),
    'LATITUDE': np.random.choice(df['LATITUDE'], len(df) * 10),
    'LONGITUDE': np.random.choice(df['LONGITUDE'], len(df) * 10),
    'FIRE_OCCURRED': 0
})
df = pd.concat([df, negative_examples], ignore_index=True)

features = ['STATE', 'temperature(C)', 'MONTH', 'east_wind', 'north_wind', 'Humidity', 'LATITUDE', 'LONGITUDE']
X = df[features]
y = df['FIRE_OCCURRED']

preprocessor = ColumnTransformer(
    transformers=[
        ('one_hot', OneHotEncoder(), ['STATE']),
        ('scale', StandardScaler(), ['temperature(C)', 'MONTH', 'east_wind', 'north_wind', 'Humidity', 'LATITUDE', 'LONGITUDE'])
    ])

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

def predict_fire_occurrence(state, temperature, month, east_wind, north_wind, humidity, latitude, longitude):
    input_data = pd.DataFrame({
        'STATE': [state],
        'temperature(C)': [temperature],
        'MONTH': [month],
        'east_wind': [east_wind],
        'north_wind': [north_wind],
        'Humidity': [humidity],
        'LATITUDE': [latitude],
        'LONGITUDE': [longitude]
    })
    fire_occurrence_prediction = pipeline.predict_proba(input_data)[0][1]
    return fire_occurrence_prediction

probability = predict_fire_occurrence('CA', 30, 7, 5, 3, 20, 38.5, -120.5)
print(f'Probability of Fire Occurrence: {probability}')


In [None]:
risk = predict_fire_risk('CA', 30, 7, 5, 3, 20, 38.5, -120.5)
print(f'Predicted Fire Risk: {risk}')