# Loading and Preparing the Data

In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

# Step 1: Load the dataset
file_path = 'fertilizer.csv'  # Update with the correct file path
data = pd.read_csv(file_path)

# Step 2: Encode categorical variables
label_encoders = {}

# Categorical columns (update based on your dataset)
categorical_columns = ['Soil Type', 'Crop Type', 'Fertilizer Name']

# Apply Label Encoding to categorical columns
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Save the Label Encoders for later use
with open('label_encoders.pkl', 'wb') as le_file:
    pickle.dump(label_encoders, le_file)

# Step 3: Define features (X) and target (y)
X = data.drop(columns=['Fertilizer Name'])  # Features (without target)
y = data['Fertilizer Name']  # Target

# Step 4: Scale numerical features
scaler = StandardScaler()

# Identify numerical columns
numeric_columns = ['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']

# Scale numerical columns
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Save the Scaler for future use
with open('fertilizer_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Initialize the RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 7: Train the model
model.fit(X_train, y_train)

# Step 8: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 9: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 10: Save the trained model
with open('fertilizer_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

print("Model and encoders saved successfully.")


Accuracy: 81.82%

Classification Report:
              precision    recall  f1-score   support

           1       0.86      1.00      0.92         6
           2       0.00      0.00      0.00         1
           3       1.00      0.50      0.67         4
           4       0.50      0.50      0.50         2
           5       0.83      1.00      0.91        15
           6       0.57      0.67      0.62         6
           7       1.00      0.80      0.89        10

    accuracy                           0.82        44
   macro avg       0.68      0.64      0.64        44
weighted avg       0.82      0.82      0.81        44

Model and encoders saved successfully.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os

# Load the dataset
file_path = 'fertilizer.csv'  # Make sure the file path is correct
data = pd.read_csv(file_path)

# Create the directory if it doesn't exist
output_dir = 'static/images'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Distribution of Temperature
plt.figure(figsize=(10, 6))
sns.histplot(data['Temparature'], bins=30, kde=True)
plt.title('Distribution of Temperature')
plt.xlabel('Temperature (°C)')
plt.ylabel('Frequency')
plt.savefig(f'{output_dir}/temperature_distribution.png')
plt.close()

# Distribution of Humidity
plt.figure(figsize=(10, 6))
sns.histplot(data['Humidity '], bins=30, kde=True)
plt.title('Distribution of Humidity')
plt.xlabel('Humidity (%)')
plt.ylabel('Frequency')
plt.savefig(f'{output_dir}/humidity_distribution.png')
plt.close()

# Distribution of Nitrogen Levels
plt.figure(figsize=(10, 6))
sns.histplot(data['Nitrogen'], bins=30, kde=True)
plt.title('Distribution of Nitrogen Levels')
plt.xlabel('Nitrogen (ppm)')
plt.ylabel('Frequency')
plt.savefig(f'{output_dir}/nitrogen_distribution.png')
plt.close()

# Select only numeric columns for the correlation heatmap
numeric_data = data.select_dtypes(include=[float, int])

# Correlation Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
plt.title('Correlation Heatmap')
plt.savefig(f'{output_dir}/correlation_heatmap_fertilizer.png')
plt.close()

# Bar chart of average Nitrogen levels by Soil Type
plt.figure(figsize=(12, 6))
average_nitrogen_by_soil = data.groupby('Soil Type')['Nitrogen'].mean().sort_values()
sns.barplot(x=average_nitrogen_by_soil.index, y=average_nitrogen_by_soil.values)
plt.title('Average Nitrogen Levels by Soil Type')
plt.xlabel('Soil Type')
plt.ylabel('Average Nitrogen (ppm)')
plt.xticks(rotation=45)
plt.savefig(f'{output_dir}/average_nitrogen_by_soil.png')
plt.close()

# Bar chart of average Potassium levels by Crop Type
plt.figure(figsize=(12, 6))
average_potassium_by_crop = data.groupby('Crop Type')['Potassium'].mean().sort_values()
sns.barplot(x=average_potassium_by_crop.index, y=average_potassium_by_crop.values)
plt.title('Average Potassium Levels by Crop Type')
plt.xlabel('Crop Type')
plt.ylabel('Average Potassium (ppm)')
plt.xticks(rotation=45)
plt.savefig(f'{output_dir}/average_potassium_by_crop.png')
plt.close()

# Pie chart of Fertilizer Distribution by Type
plt.figure(figsize=(12, 8))
fertilizer_distribution = data['Fertilizer Name'].value_counts()
plt.pie(fertilizer_distribution, labels=fertilizer_distribution.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Fertilizers by Type')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.savefig(f'{output_dir}/fertilizer_distribution.png')
plt.close()


In [7]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

# Step 1: Load the dataset
file_path = 'fertilizer.csv'  # Update with the correct file path
data = pd.read_csv(file_path)

# Step 2: Encode categorical variables
label_encoders = {}

# Categorical columns (update based on your dataset)
categorical_columns = ['Soil Type', 'Crop Type', 'Fertilizer Name']

# Apply Label Encoding to categorical columns
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Save the Label Encoders for later use
with open('samplelb.pkl', 'wb') as le_file:
    pickle.dump(label_encoders, le_file)

# Step 3: Define features (X) and target (y)
X = data.drop(columns=['Fertilizer Name'])  # Features (without target)
y = data['Fertilizer Name']  # Target

# Step 4: Scale numerical features
scaler = StandardScaler()

# Identify numerical columns
numeric_columns = ['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']

# Scale numerical columns
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Save the Scaler for future use
with open('samplescaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Initialize and Train different Classification Models
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

# Dictionary to store the accuracy of each model
model_accuracies = {}

# Train each model and evaluate
for model_name, model in models.items():
    print(f"\nTraining and Evaluating {model_name}...")
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    model_accuracies[model_name] = accuracy
    
    # Print accuracy and classification report
    print(f'Accuracy of {model_name}: {accuracy * 100:.2f}%')
    print(f"\nClassification Report for {model_name}:\n")
    print(classification_report(y_test, y_pred))

# Step 7: Save the best performing model
best_model_name = max(model_accuracies, key=model_accuracies.get)
best_model = models[best_model_name]

with open('best_fertilizer_model.pkl', 'wb') as model_file:
    pickle.dump(best_model, model_file)

print(f"\nBest Model: {best_model_name} saved successfully with accuracy: {model_accuracies[best_model_name] * 100:.2f}%")



Training and Evaluating Random Forest...
Accuracy of Random Forest: 81.82%

Classification Report for Random Forest:

              precision    recall  f1-score   support

           1       0.86      1.00      0.92         6
           2       0.00      0.00      0.00         1
           3       1.00      0.50      0.67         4
           4       0.50      0.50      0.50         2
           5       0.83      1.00      0.91        15
           6       0.57      0.67      0.62         6
           7       1.00      0.80      0.89        10

    accuracy                           0.82        44
   macro avg       0.68      0.64      0.64        44
weighted avg       0.82      0.82      0.81        44


Training and Evaluating Logistic Regression...
Accuracy of Logistic Regression: 72.73%

Classification Report for Logistic Regression:

              precision    recall  f1-score   support

           1       0.71      0.83      0.77         6
           2       0.00      0.00     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.75      0.50      0.60         6
           2       0.00      0.00      0.00         1
           3       0.67      0.50      0.57         4
           4       0.25      0.50      0.33         2
           5       0.75      0.80      0.77        15
           6       0.50      0.67      0.57         6
           7       0.88      0.70      0.78        10

    accuracy                           0.66        44
   macro avg       0.47      0.46      0.45        44
weighted avg       0.70      0.66      0.67        44


Training and Evaluating Naive Bayes...
Accuracy of Naive Bayes: 68.18%

Classification Report for Naive Bayes:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.83      0.91         6
           2       0.00      0.00      0.00         1
           3       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from flask import Flask, request, jsonify, render_template
import pickle
import numpy as np
import logging
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

# Configure logging
logging.basicConfig(level=logging.INFO)

# Load the trained model, scaler, and label encoders
def load_artifacts():
    try:
        with open('fertilizer_model.pkl', 'rb') as model_file:
            model = pickle.load(model_file)
            app.logger.info("Model loaded successfully")

        with open('fertilizer_scaler.pkl', 'rb') as scaler_file:
            scaler = pickle.load(scaler_file)
            app.logger.info("Scaler loaded successfully")

        with open('label_encoders.pkl', 'rb') as le_file:
            label_encoders = pickle.load(le_file)
            app.logger.info("Label encoders loaded successfully")
            
        return model, scaler, label_encoders
    except Exception as e:
        app.logger.error(f"Error loading artifacts: {e}")
        raise e

# Load artifacts
model, scaler, label_encoders = load_artifacts()

@app.route('/')
def home():
    return render_template('index.html')
@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json()
        app.logger.info('Received data: %s', data)

        if not data or 'features' not in data:
            raise ValueError("Invalid input data: 'features' key not found.")

        features = data['features']
        app.logger.info('Raw features: %s', features)

        if len(features) != 8:
            raise ValueError("Invalid number of features provided.")

        # Extract and encode soil_type and crop_type (categorical features)
        soil_type, crop_type = features[0], features[1]

        if soil_type not in label_encoders['Soil Type'].classes_:
            raise ValueError(f"Soil Type '{soil_type}' not recognized.")
        if crop_type not in label_encoders['Crop Type'].classes_:
            raise ValueError(f"Crop Type '{crop_type}' not recognized.")

        soil_type_encoded = label_encoders['Soil Type'].transform([soil_type])[0]
        crop_type_encoded = label_encoders['Crop Type'].transform([crop_type])[0]

        # Extract numerical features
        numerical_features = np.array(features[2:]).reshape(1, -1)

        # Scale only the numerical features (Temperature, Humidity, etc.)
        numerical_features_scaled = scaler.transform(numerical_features)

        # Combine encoded categorical and scaled numerical features
        final_features = np.concatenate([[soil_type_encoded, crop_type_encoded], numerical_features_scaled[0]])

        # Predict using the model
        prediction = model.predict([final_features])
        app.logger.info('Prediction: %s', prediction)

        fertilizer_recommendation = label_encoders['Fertilizer Name'].inverse_transform([int(prediction[0])])[0]

        return jsonify({'recommendation': fertilizer_recommendation})
    except ValueError as ve:
        error_message = f"Value Error: {str(ve)}"
        app.logger.error(error_message)
        return jsonify({'error': error_message}), 400
    except Exception as e:
        error_message = f"Error during prediction: {str(e)}"
        app.logger.error(error_message)
        return jsonify({'error': error_message}), 500




@app.route('/analytics')
def analytics():
    return render_template('analytics.html')

if __name__ == '__main__':
    app.run(debug=True, port=5002, use_reloader=False)  # Change port here



INFO:__main__:Model loaded successfully
INFO:__main__:Scaler loaded successfully
INFO:__main__:Label encoders loaded successfully


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5002
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:13:15] "GET / HTTP/1.1" 200 -
INFO:__main__:Received data: {'features': ['Sandy', 'Maize', '26', '52', '38', '37', '0', '0']}
INFO:__main__:Raw features: ['Sandy', 'Maize', '26', '52', '38', '37', '0', '0']
INFO:__main__:Prediction: [7]
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:13:21] "POST /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:14:10] "GET /analytics HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:14:10] "[36mGET /static/images/temperature_distribution.png HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:14:10] "[36mGET /static/images/humidity_distribution.png HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:14:10] "[36mGET /static/images/nitrogen_distribution.png HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [14/Sep/2024 12:14:10] "[36mGET /static/images/correlation_heatmap_fertilize