# Machine Learning Model Framework - K-Means Clustering

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from math import ceil

In [6]:
data = pd.read_csv('reforestation_class.csv')
data.head()

#example file. : {county, state, country, land_cost, etc}

Unnamed: 0,State,Abbreviation,County,Population,Temperature,Precipitation,Palmer-Z,Land-Prices,Population_Class,Temperature_Class,Precipitation_Class,Palmer-Z_Class,Land-Prices_Class,percentile_rank,Final_Class
0,Alabama,AL,Autauga,59759,66.1,51.89,-0.26,3400.0,2,5,3,5,5.0,0.0,0
1,Alabama,AL,Baldwin,246435,70.1,50.71,-0.84,3400.0,1,5,3,2,5.0,0.0,0
2,Alabama,AL,Barbour,24706,66.4,48.95,-0.22,3400.0,4,5,3,5,5.0,0.0,0
3,Alabama,AL,Bibb,22005,64.4,53.22,-0.29,3400.0,4,6,1,5,5.0,0.0,0
4,Alabama,AL,Blount,59512,62.5,60.68,0.24,3400.0,2,6,1,7,5.0,0.0,0


In [10]:
data = data.dropna()

In [11]:
avg_scores = data[['Population_Class','Temperature_Class','Precipitation_Class', 'Palmer-Z_Class','Land-Prices_Class']].mean(axis=1)

data['percentile_rank'] = avg_scores.rank(pct=True).round(2)

data['Final_Class'] = data['percentile_rank'].apply(lambda x: ceil(x * 7))

data.head()

Unnamed: 0,State,Abbreviation,County,Population,Temperature,Precipitation,Palmer-Z,Land-Prices,Population_Class,Temperature_Class,Precipitation_Class,Palmer-Z_Class,Land-Prices_Class,percentile_rank,Final_Class
0,Alabama,AL,Autauga,59759,66.1,51.89,-0.26,3400.0,2,5,3,5,5.0,0.48,4
1,Alabama,AL,Baldwin,246435,70.1,50.71,-0.84,3400.0,1,5,3,2,5.0,0.18,2
2,Alabama,AL,Barbour,24706,66.4,48.95,-0.22,3400.0,4,5,3,5,5.0,0.68,5
3,Alabama,AL,Bibb,22005,64.4,53.22,-0.29,3400.0,4,6,1,5,5.0,0.58,5
4,Alabama,AL,Blount,59512,62.5,60.68,0.24,3400.0,2,6,1,7,5.0,0.58,5


In [12]:
features = ['Population_Class', 'Temperature_Class', 'Precipitation_Class', 'Palmer-Z_Class', 'Land-Prices_Class']
X = data[features]
y = data['Final_Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

clf_svm = SVC(kernel='linear', C=0.1)  

clf_svm.fit(X_train_scaled, y_train)

y_pred = clf_svm.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9794303797468354


In [13]:
clf_random = RandomForestClassifier(n_estimators=100, random_state=42)

clf_random.fit(X_train, y_train)

y_pred = clf_random.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.7863924050632911


In [14]:
data.to_csv('reforestation_class.csv', index=False)