In [20]:
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from requests import get
from json import dump, load
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


## Fetch Data From API

In [22]:
def fetch_data_and_save_to_json():    
    api_url = "http://10.43.101.149/data?group_number=1"
    response = get(api_url)
    if response.status_code == 200:
        data = response.json()
        with open("api/data/covertype.json", "w") as outfile:
            dump(data, outfile)
        print("Data fetched from API and saved locally.")
    else:
        print("Error fetching data from API.")

In [5]:
fetch_data_and_save_to_json()

Data fetched from API and saved locally.


## Train Model

In [23]:
# Load data from JSON into DataFrame
with open('api/data/covertype.json') as f:
    data = load(f)
df = DataFrame(data['data'])
# Assign column names because json just contains data without headers (column names)
column_names = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
                'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
                'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
                'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area', 'Soil_Type',
                'Cover_Type']
df.columns = column_names
# Split DataFrame  features (X) and tags (y)
X = df.drop('Cover_Type', axis=1) 
y = df['Cover_Type'] 

# Define categorical variables to apply One-Hot Encoding
categorical_cols = ['Wilderness_Area', 'Soil_Type']

# Create transformer to apply One-Hot Encoding to categorical columns
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), categorical_cols)], remainder='passthrough')

# Aplicar la transformación a las características X
X_encoded = ct.fit_transform(X)

# Dividir los datos transformados en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Entrenar un modelo RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Realizar predicciones en el conjunto de prueba
y_pred = rf_classifier.predict(X_test)

# Calcular la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy}')



Precisión del modelo: 0.8958691910499139
