In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
airstrike = pd.read_csv("STRIKE_REPORTS.csv")

In [None]:
print(airstrike.head())

In [None]:
NAs = pd.concat([airstrike.isnull().sum()], axis=1, keys=["Airstrike"])
missing_cols = NAs[NAs.sum(axis=1) > 0]
print(missing_cols)

In [None]:
airstrike = airstrike.dropna()

In [None]:
features = ['AIRCRAFT', 'SPECIES', 'PHASE_OF_FLIGHT', 'WEATHER']
X = airstrike[features]
y = airstrike['DAMAGE_LEVEL']

In [None]:
X = pd.get_dummies(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
rf_classifier.fit(X_train, y_train)

In [None]:
predictions = rf_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)


In [None]:
print(f'Accuracy: {accuracy:.2f}')
print('\nConfusion Matrix:')
print(conf_matrix)
print('\nClassification Report:')
print(classification_rep)

In [None]:
feature_importances = rf_classifier.feature_importances_
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(feature_importance_df['Feature'], feature_importance_df['Importance'])
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('Feature Importances')
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
from sklearn.tree import export_text

In [None]:
for tree_id in range(3):
    tree_rules = export_text(rf_classifier.estimators_[tree_id], feature_names=list(X.columns))
    print(f"Decision Tree {tree_id + 1}:\n{tree_rules}\n")


In [None]:
class_balance = airstrike['DAMAGE_LEVEL'].value_counts()
print("Class Balance:")
print(class_balance)


In [None]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    # Add other hyperparameters to tune
}

In [None]:
# Grid Search for Hyperparameter Tuning
# from sklearn.model_selection import GridSearchCV
# grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
# grid_search.fit(X_train, y_train)
# best_params = grid_search.best_params_
# print("Best Hyperparameters:", best_params)


In [None]:
# Improved Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
rf_classifier.fit(X_train, y_train)

In [None]:
# Cross-validation
from sklearn.model_selection import cross_val_score
cross_val_scores = cross_val_score(rf_classifier, X_train, y_train, cv=5)
print("Cross-validation Scores:", cross_val_scores)


In [None]:
# Visualize Decision Tree
from sklearn.tree import export_graphviz
import graphviz

In [None]:
# Choose a tree to visualize (e.g., the first tree)
tree_to_visualize = rf_classifier.estimators_[0]

In [None]:
# Export as dot file
dot_data = export_graphviz(tree_to_visualize, out_file=None, feature_names=list(X.columns), class_names=list(map(str, y.unique())), filled=True, rounded=True)

In [None]:
# Visualize the graph
graph = graphviz.Source(dot_data)
graph.render("decision_tree")
graph.view("decision_tree")