In [None]:
import os
import pandas as pd
import numpy as np

from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn import feature_selection
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics.pairwise import euclidean_distances
import random
from sklearn.metrics.pairwise import cosine_similarity

# Visualization
import matplotlib
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly import graph_objects as go
from sklearn.neighbors import NearestNeighbors

#use streamlit for deployment

In [None]:
fertilizer_df= pd.read_csv("/content/Fertilizer.csv")
crop_df=pd.read_csv("/content/Extended_Crop_Recommendation.csv")
season_df=pd.read_csv("/content/data_season.csv")
yield_df=pd.read_csv("/content/crop_yield.csv")
tunisia_df=pd.read_csv("/content/Tunisie_Sol.csv")
desc_df=pd.read_csv("/content/tunisia_crops_descriptions_3.csv")

FileNotFoundError: [Errno 2] No such file or directory: '/content/Fertilizer.csv'

In [None]:
fertilizer_df.head(5)

In [None]:
crop_df.head()

In [None]:
yield_df.head()

In [None]:
crop_df_crops = crop_df['label'].unique()
yield_df_crops = yield_df['Crop'].unique()

crop_df_crops, yield_df_crops

In [None]:
tunisia_df.head()

Traitement des données

Traitement de la table tunise_sol

In [None]:
cols_to_drop = ['ADM0_NAME','system:index', 'ADM0_CODE', 'ADM1_CODE', 'DISP_AREA',
                'EXP1_YEAR', 'STATUS', 'STR1_YEAR', '.geo']
tunisia_df_clean = tunisia_df.drop(columns=cols_to_drop)


In [None]:
tunisia_df_clean = tunisia_df_clean[tunisia_df_clean['Profondeur_cm'].isin([0, 10])]

tunisia_df_clean.head()

In [None]:
tunisia_df_clean = tunisia_df_clean.rename(columns={'ADM1_NAME': 'gouvernerat'})
tunisia_df_clean.head()

Fusion des fichiers

In [None]:
# Appliquer la même logique avec le bon nom de colonne

yield_df['Crop_clean'] = yield_df['Crop'].str.lower().str.strip()

# Créer un dictionnaire Crop_clean -> Season
season_dict = yield_df.drop_duplicates(subset='Crop_clean').set_index('Crop_clean')['Season'].to_dict()

# Mapper les saisons sur crop_df
crop_df['label_clean'] = crop_df['label'].str.lower().str.strip()
crop_df['Season'] = crop_df['label_clean'].map(season_dict)

# Supprimer la colonne temporaire
crop_df.drop(columns=['label_clean'], inplace=True)

# Afficher le résultat
crop_df

dictionnaire irrigation

In [None]:
irrigation_methods = {
    'rice': 'Flood',
    'maize': 'Sprinkler',
    'chickpea': 'Drip',
    'kidneybeans': 'Drip',
    'pigeonpeas': 'Drip',
    'mothbeans': 'Drip',
    'mungbean': 'Drip',
    'blackgram': 'Drip',
    'lentil': 'Drip',
    'pomegranate': 'Drip',
    'banana': 'Sprinkler',
    'mango': 'Drip',
    'grapes': 'Drip',
    'watermelon': 'Sprinkler',
    'muskmelon': 'Sprinkler',
    'apple': 'Sprinkler',
    'orange': 'Drip',
    'papaya': 'Sprinkler',
    'coconut': 'Flood',
    'cotton': 'Drip',
    'jute': 'Flood',
    'coffee': 'Sprinkler',
    'wheat': 'Sprinkler',
    'barley': 'Sprinkler',
    'green gram': 'Drip',
    'peas': 'Drip',
    'horse-gram': 'Drip',
    'groundnut': 'Drip',
    'sesamum': 'Drip',
    'sunflower': 'Drip',
    'potato': 'Sprinkler',
    'sweet potato': 'Sprinkler',
    'garlic': 'Drip',
    'onion': 'Drip',
    'coriander': 'Drip',
    'tobacco': 'Sprinkler',
    'urad': 'Drip'
}

In [None]:
crop_df['Irrigation'] = crop_df['label'].str.lower().map(irrigation_methods)

In [None]:
crop_df

In [None]:
soil_features = ['Nitrogen', 'Phosphorus', 'Potassium', 'pH']
crop_features = ['N', 'P', 'K', 'ph']

tunisia_values = tunisia_df_clean[soil_features].values
crop_values = crop_df[crop_features].values

# Calcul de la distance euclidienne
distances = euclidean_distances(crop_values, tunisia_values)
closest_indices = np.argmin(distances, axis=1)

# Ajout du gouvernerat à crop_df
crop_df['gouvernerat'] = tunisia_df_clean.iloc[closest_indices]['gouvernerat'].values

In [None]:
crop_df

Nettoyage des bases de données

In [None]:
crop_df_crops = crop_df['label'].unique()
crop_df_crops

In [None]:
tunisia_crops = [
    'wheat', 'barley', 'maize', 'chickpea', 'lentil', 'peas', 'green gram',
    'potato', 'sweet potato', 'garlic', 'onion', 'coriander',
    'orange', 'apple', 'grapes', 'pomegranate', 'watermelon',
    'muskmelon', 'olive', 'citrus', 'mango', 'sunflower', 'groundnut',
    'tobacco',
]

In [None]:
# Normaliser les cultures pour la comparaison (tout en minuscule)
tunisia_crops_set = set([c.lower() for c in tunisia_crops])
crop_df = crop_df[crop_df['label'].str.lower().isin(tunisia_crops_set)]

In [None]:
crop_df

In [None]:
# Define the mapping dictionary
season_mapping = {
    "Kharif": "Summer",
    "Rabi": "Winter"
}

# Strip any spaces or hidden characters from 'Season' before replacing
crop_df['Season'] = crop_df['Season'].str.strip().replace(season_mapping)

# Display the first few rows to confirm the changes
crop_df.sample(10)

In [None]:
null_season_df = crop_df[crop_df['Season'].isna()]
null_season_df_crops = null_season_df['label'].unique()
null_season_df_crops

In [None]:
season_dict = {
    'chickpea': 'Winter',         # grows mainly through the winter
    'lentil': 'Winter',           # develops during winter
    'pomegranate': 'Summer',      # matures in late summer
    'mango': 'Summer',            # ripens in summer
    'grapes': 'Summer',           # grows and ripens in summer
    'watermelon': 'Summer',       # rapid growth in early summer
    'muskmelon': 'Spring',        # grows mainly in spring
    'apple': 'Autumn',            # matures mostly in autumn
    'orange': 'Winter',           # develops and harvested in winter
    'green gram': 'Summer',       # summer short-cycle pulse
    'peas': 'Winter'              # slow development through winter
}

In [None]:
crop_df.loc[crop_df['Season'].isna(), 'Season'] = crop_df.loc[crop_df['Season'].isna(), 'label'].str.lower().map(season_dict)

In [None]:
null_season_df = crop_df[crop_df['Season'].isna()]
null_season_df_crops = null_season_df['label'].unique()
null_season_df_crops

In [None]:
crop_df

In [None]:
crop_df['Season'].value_counts(ascending=False)

In [None]:
print("Duplicate Values =",crop_df.duplicated().sum()) #duplicate

In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Nettoyer noms et colonnes
crop_df = crop_df.rename(columns={"label": "Crop"})
crop_df['Crop'] = crop_df['Crop'].astype(str).str.lower().str.strip()
fertilizer_df = fertilizer_df.rename(columns={
    "Nitrogen": "N", "Phosphorous": "P", "Potassium": "K"
})

# Appliquer Nearest Neighbors sur les colonnes N, P, K
fertilizer_npk = fertilizer_df[["N", "P", "K"]].values
crop_npk = crop_df[["N", "P", "K"]].values

nn = NearestNeighbors(n_neighbors=1)
nn.fit(fertilizer_npk)

# Trouver l'engrais le plus proche pour chaque ligne
distances, indices = nn.kneighbors(crop_npk)

# Ajouter la colonne des engrais les plus proches
crop_df["Nearest Fertilizer"] = [
    fertilizer_df.iloc[i]["Fertilizer Name"] for i in indices.flatten()
]
crop_df = crop_df.rename(columns={"Crop": "label"})



In [None]:
# Renommer la colonne Crop → label dans les deux DataFrames
crop_df = crop_df.rename(columns={"Crop": "label"})
desc_df = desc_df.rename(columns={"Crop": "label"})

# Nettoyage cohérent
crop_df["label"] = crop_df["label"].astype(str).str.lower().str.strip()
desc_df["label"] = desc_df["label"].astype(str).str.lower().str.strip()

# Fusion des données sur le nom de la culture
crop_df = pd.merge(crop_df, desc_df, on="label", how="left")


In [None]:
crop_df.to_csv("crop_cleaned.csv", index=False)

encodage des colonnes

In [None]:
# Remplacement direct des colonnes par les valeurs encodées

# Encodage simple et logique des saisons
season_order = {
    'Winter': 1,
    'Spring': 2,
    'Summer': 3,
    'Autumn': 4,
    'Whole Year': 5
}
crop_df['Season'] = crop_df['Season'].map(season_order)

# Encodage des cultures
unique_crops = sorted(crop_df['label'].str.lower().unique())
crop_encoding = {crop: idx+1 for idx, crop in enumerate(unique_crops)}
crop_df['label'] = crop_df['label'].str.lower().map(crop_encoding)

# Encodage des gouvernerats
unique_govs = sorted(crop_df['gouvernerat'].dropna().unique())
gov_encoding = {gov: idx+1 for idx, gov in enumerate(unique_govs)}
crop_df['gouvernerat'] = crop_df['gouvernerat'].map(gov_encoding)

# Encodage de l'irrigation (corrigé)
unique_irrigations = sorted(crop_df['Irrigation'].dropna().unique())
irrigation_encoding = {irrig: idx + 1 for idx, irrig in enumerate(unique_irrigations)}
crop_df['Irrigation'] = crop_df['Irrigation'].map(irrigation_encoding)

crop_df


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
0,71,54,16,22.613600,63.690706,5.749914,87.759539,10,3,2,9,Twenty Eight-Twenty Eight,"Maize, also known as corn in North American En..."
1,61,44,17,26.100184,71.574769,6.931757,102.266244,10,3,2,7,Twenty Eight-Twenty Eight,"Maize, also known as corn in North American En..."
2,80,43,16,23.558821,71.593514,6.657965,66.719955,10,3,2,2,Urea,"Maize, also known as corn in North American En..."
3,73,58,21,19.972160,57.682729,6.596061,60.651715,10,3,2,2,Twenty Eight-Twenty Eight,"Maize, also known as corn in North American En..."
4,61,38,20,18.478913,62.695039,5.970458,65.438354,10,3,2,7,Twenty Eight-Twenty Eight,"Maize, also known as corn in North American En..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1093,49,36,62,22.970000,60.110000,5.790000,529.810000,20,5,2,16,Ten-Twenty Six-Twenty Six,
1094,44,30,68,23.840000,67.780000,5.820000,657.570000,20,5,2,1,Ten-Twenty Six-Twenty Six,
1095,50,44,66,29.580000,63.220000,6.350000,590.190000,20,5,2,16,Ten-Twenty Six-Twenty Six,
1096,43,38,63,26.120000,67.570000,6.420000,524.140000,20,5,2,16,Ten-Twenty Six-Twenty Six,


In [None]:
print("valeur null \n",crop_df.isna().sum()) #duplicate

valeur null 
 N                       0
P                       0
K                       0
temperature             0
humidity                0
ph                      0
rainfall                0
label                   0
Season                  0
Irrigation              0
gouvernerat             0
Nearest Fertilizer      0
Description           677
dtype: int64


Modélisation

In [None]:
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall',
            'label', 'Season', 'Irrigation', 'gouvernerat']

X = crop_df[features].values

In [None]:
# --- ENCODAGE DES ENTRÉES UTILISATEUR ---
def encode_user_input(user_input):
    return [
        user_input['N'],
        user_input['P'],
        user_input['K'],
        user_input['temperature'],
        user_input['humidity'],
        user_input['ph'],
        user_input['rainfall'],
        crop_encoding.get(user_input['label'].lower(), 0),
        season_order.get(user_input['Season'], 0),
        gov_encoding.get(user_input['gouvernerat'], 0),
        irrigation_encoding.get(user_input['Irrigation'], 0)
    ]

In [None]:
# --- DÉCODAGE POUR AFFICHER DES RÉSULTATS LISIBLES ---
# Dictionnaires inverses pour retrouver les noms à partir des codes
crop_decoding = {v: k for k, v in crop_encoding.items()}
season_decoding = {v: k for k, v in season_order.items()}
gov_decoding = {v: k for k, v in gov_encoding.items()}
irrigation_decoding = {v: k for k, v in irrigation_encoding.items()}

def decode_results(df):
    df = df.copy()
    df['label'] = df['label'].map(crop_decoding)
    df['Season'] = df['Season'].map(season_decoding)
    df['gouvernerat'] = df['gouvernerat'].map(gov_decoding)
    df['Irrigation'] = df['Irrigation'].map(irrigation_decoding)
    return df



In [None]:
# --- SYSTÈME DE RECOMMANDATION ---
def recommend_crops(input_vector, top_n=5):
    features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall',
                'label', 'Season', 'gouvernerat', 'Irrigation']
    X = crop_df[features].values
    similarities = cosine_similarity([input_vector], X)
    top_indices = np.argsort(similarities[0])[::-1][:top_n]
    top_df = crop_df.iloc[top_indices]
    return decode_results(top_df)

In [None]:
user_input = {
    'N': 70,
    'P': 30,
    'K': 45,
    'temperature': 24,
    'humidity': 70,
    'ph': 6.4,
    'rainfall': 180,
    'label': 'chickpea',
    'Season': 'Spring',
    'gouvernerat': 'Sfax',
    'Irrigation': 'Drip'
}

input_vector = encode_user_input(user_input)
recommend_crops(input_vector, top_n=5)


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
1007,64,33,48,16.93,51.92,6.53,166.09,barley,Whole Year,Sprinkler,Ariana,Ten-Twenty Six-Twenty Six,
1011,78,34,48,18.27,53.82,6.31,150.89,barley,Whole Year,Sprinkler,Bizerte,Urea,
1008,73,39,32,16.62,45.51,6.32,152.17,barley,Whole Year,Sprinkler,Bizerte,Urea,
424,38,15,30,28.91862,48.139745,5.075505,97.013316,mango,Summer,Drip,Sidi Bouz,Ten-Twenty Six-Twenty Six,
421,36,25,33,27.983928,53.330189,5.548585,99.614657,mango,Summer,Drip,Mahdia,Ten-Twenty Six-Twenty Six,


système de recommandation cas par cas

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_similarity_recommendations(input_vector, features, top_n=5):
    X = crop_df[features].values
    similarities = cosine_similarity([input_vector], X)
    top_indices = similarities[0].argsort()[::-1][:top_n]
    return decode_results(crop_df.iloc[top_indices])

Recommandation par crop

In [None]:
def recommend_by_crop_balanced(crop_name, top_n=5):
    # Extraire la ligne correspondant à la crop d'origine
    crop_row = crop_df[crop_df['label'] == crop_encoding.get(crop_name.lower())]
    if crop_row.empty:
        return f"❌ Crop '{crop_name}' not found."

    # Définir les features à comparer
    features = ['N', 'P', 'K', 'ph', 'temperature', 'humidity', 'rainfall', 'Season', 'Irrigation']

    # Vecteur d'entrée
    input_vector = crop_row.iloc[0][features].values

    # Calcul des similarités
    X = crop_df[features].values
    similarities = cosine_similarity([input_vector], X)

    # Obtenir les indices triés par similarité
    top_indices = np.argsort(similarities[0])[::-1]

    # Préparer les résultats avec une seule ligne pour la crop d’origine
    seen_labels = set()
    results = []

    for idx in top_indices:
        row = crop_df.iloc[idx]
        decoded_label = [k for k, v in crop_encoding.items() if v == row['label']][0]
        if decoded_label == crop_name.lower() and crop_name.lower() not in seen_labels:
            results.append(row)
            seen_labels.add(decoded_label)
        elif decoded_label != crop_name.lower() and decoded_label not in seen_labels:
            results.append(row)
            seen_labels.add(decoded_label)
        if len(results) >= top_n:
            break

    # Convertir en DataFrame et décoder les valeurs
    df_result = pd.DataFrame(results)
    df_result['label'] = df_result['label'].map({v: k for k, v in crop_encoding.items()})
    df_result['Season'] = df_result['Season'].map({v: k for k, v in season_order.items()})
    df_result['Irrigation'] = df_result['Irrigation'].map({v: k for k, v in irrigation_encoding.items()})
    df_result['gouvernerat'] = df_result['gouvernerat'].map({v: k for k, v in gov_encoding.items()})

    return df_result.reset_index(drop=True)

Recommandation par saison

In [None]:
def recommend_by_season_balanced(season, top_n=5):
    season_code = season_order.get(season, 0)
    if season_code == 0:
        return f"❌ Season '{season}' not recognized."

    # 1️⃣ Étape 1 : récupérer au moins une culture de la saison demandée
    seasonal_crops = crop_df[crop_df['Season'] == season_code]
    seen_labels = set()
    results = []

    if not seasonal_crops.empty:
        # Ajouter la première culture de cette saison
        for _, row in seasonal_crops.iterrows():
            if row['label'] not in seen_labels:
                results.append(row)
                seen_labels.add(row['label'])
                break  # on veut UNE seule ligne de la saison demandée

    # 2️⃣ Étape 2 : chercher des cultures proches de la saison demandée
    # Calculer le profil moyen de la saison demandée
    if seasonal_crops.empty:
        return "❌ No crop registered for this season."

    mean_vector = seasonal_crops[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].mean().values
    similarities = cosine_similarity([mean_vector], crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].values)
    top_indices = np.argsort(similarities[0])[::-1]

    # 3️⃣ Étape 3 : compléter avec des cultures variées, autres que celle ajoutée
    for idx in top_indices:
        row = crop_df.iloc[idx]
        if row['label'] not in seen_labels:
            results.append(row)
            seen_labels.add(row['label'])
        if len(results) >= top_n:
            break

    # 4️⃣ Décodage
    df_result = pd.DataFrame(results)
    df_result['label'] = df_result['label'].map({v: k for k, v in crop_encoding.items()})
    df_result['Season'] = df_result['Season'].map({v: k for k, v in season_order.items()})
    df_result['Irrigation'] = df_result['Irrigation'].map({v: k for k, v in irrigation_encoding.items()})
    df_result['gouvernerat'] = df_result['gouvernerat'].map({v: k for k, v in gov_encoding.items()})

    return df_result.reset_index(drop=True)


Recommandation par irrigation


In [None]:
def recommend_by_irrigation_balanced(irrigation_type, top_n=5):
    # Récupération du code d'irrigation
    irrigation_code = irrigation_encoding.get(irrigation_type, 0)
    if irrigation_code == 0:
        return f"❌ Irrigation type '{irrigation_type}' not found."

    # Filtrer les cultures correspondant à cette méthode d'irrigation
    filtered_crops = crop_df[crop_df['Irrigation'] == irrigation_code]
    if filtered_crops.empty:
        return f"❌ No crops found for irrigation '{irrigation_type}'."

    # Calcul de la moyenne des conditions
    mean_vector = filtered_crops[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].mean().values

    # Calcul de la similarité par rapport au profil moyen
    features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
    X = crop_df[features].values
    similarities = cosine_similarity([mean_vector], X)
    top_indices = np.argsort(similarities[0])[::-1]

    # Construction des suggestions diversifiées
    seen_labels = set()
    results = []
    for idx in top_indices:
        row = crop_df.iloc[idx]
        if row['Irrigation'] == irrigation_code and row['label'] not in seen_labels:
            results.append(row)
            seen_labels.add(row['label'])
        if len(results) >= top_n:
            break

    # Décodage
    df_result = pd.DataFrame(results)
    df_result['label'] = df_result['label'].map({v: k for k, v in crop_encoding.items()})
    df_result['Season'] = df_result['Season'].map({v: k for k, v in season_order.items()})
    df_result['Irrigation'] = df_result['Irrigation'].map({v: k for k, v in irrigation_encoding.items()})
    df_result['gouvernerat'] = df_result['gouvernerat'].map({v: k for k, v in gov_encoding.items()})

    return df_result.reset_index(drop=True)


Recommandation intelligente mixte (cas par cas)

In [None]:
def recommend_case_by_case_balanced(user_input, top_n=5):
    vectors = []

    if 'label' in user_input:
        label_code = crop_encoding.get(user_input['label'].lower(), None)
        crop_row = crop_df[crop_df['label'] == label_code]
        if not crop_row.empty:
            vectors.append(crop_row.iloc[0][['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].values)

    if 'Season' in user_input:
        season_code = season_order.get(user_input['Season'], 0)
        season_rows = crop_df[crop_df['Season'] == season_code]
        if not season_rows.empty:
            vectors.append(season_rows[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].mean().values)

    if 'Irrigation' in user_input:
        irrig_code = irrigation_encoding.get(user_input['Irrigation'], 0)
        irrig_rows = crop_df[crop_df['Irrigation'] == irrig_code]
        if not irrig_rows.empty:
            vectors.append(irrig_rows[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].mean().values)

    if not vectors:
        return "❌ No valid input provided."

    # Moyenne des profils combinés
    final_vector = np.mean(vectors, axis=0)

    # Similarité
    features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
    X = crop_df[features].values
    similarities = cosine_similarity([final_vector], X)
    top_indices = np.argsort(similarities[0])[::-1]

    # Suggestions variées
    seen_labels = set()
    results = []
    for idx in top_indices:
        row = crop_df.iloc[idx]
        if row['label'] not in seen_labels:
            results.append(row)
            seen_labels.add(row['label'])
        if len(results) >= top_n:
            break

    # Décodage
    df_result = pd.DataFrame(results)
    df_result['label'] = df_result['label'].map({v: k for k, v in crop_encoding.items()})
    df_result['Season'] = df_result['Season'].map({v: k for k, v in season_order.items()})
    df_result['Irrigation'] = df_result['Irrigation'].map({v: k for k, v in irrigation_encoding.items()})
    df_result['gouvernerat'] = df_result['gouvernerat'].map({v: k for k, v in gov_encoding.items()})

    return df_result.reset_index(drop=True)


In [None]:
recommend_by_crop_balanced('chickpea', top_n=5)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
0,40,72,77,17.024985,16.988612,7.485996,88.551231,chickpea,Winter,Drip,Tunis,Ten-Twenty Six-Twenty Six,The chickpea or chick pea is an annual legume ...
1,35,142,203,21.170892,90.237302,5.895319,123.649515,apple,Autumn,Sprinkler,Tunis,Ten-Twenty Six-Twenty Six,"An apple is a round, edible fruit produced by ..."
2,27,47,35,18.39,44.92,7.26,63.54,lentil,Winter,Drip,Sidi Bouz,Fourteen-Thirty Five-Fourteen,The lentil is a legume; it is an annual plant ...
3,62,45,34,15.44,45.17,6.02,95.13,barley,Whole Year,Sprinkler,Ariana,Twenty Eight-Twenty Eight,
4,39,132,196,35.830891,83.325601,5.778594,73.679849,grapes,Summer,Drip,Tunis,Ten-Twenty Six-Twenty Six,


In [None]:
recommend_by_season_balanced('Spring', top_n=5)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
0,115,17,55,27.578269,94.118782,6.776533,28.082532,muskmelon,Spring,Sprinkler,Jendouba,Urea,
1,95,16,55,25.269312,87.550551,6.612848,40.126504,watermelon,Summer,Sprinkler,Jendouba,Urea,
2,99,38,21,22.883309,71.597224,6.352472,67.727773,maize,Summer,Sprinkler,Jendouba,Urea,"Maize, also known as corn in North American En..."
3,110,57,42,15.1,57.31,6.37,106.6,wheat,Winter,Sprinkler,Jendouba,Urea,
4,40,61,22,20.949818,65.810876,7.002216,44.23913,lentil,Winter,Drip,Kairouan,DAP,The lentil is a legume; it is an annual plant ...


In [None]:
recommend_by_irrigation_balanced('Drip', top_n=5)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
0,23,45,36,24.36,48.34,6.3,94.97,lentil,Winter,Drip,Sfax,Fourteen-Thirty Five-Fourteen,The lentil is a legume; it is an annual plant ...
1,22,54,38,21.12,46.77,8.85,98.39,chickpea,Winter,Drip,Sfax,Fourteen-Thirty Five-Fourteen,The chickpea or chick pea is an annual legume ...
2,24,33,35,29.263829,54.822579,5.342866,100.758623,mango,Summer,Drip,Sfax,Ten-Twenty Six-Twenty Six,
3,23,30,44,20.938929,85.429129,6.124761,103.029594,pomegranate,Summer,Drip,Sfax,Ten-Twenty Six-Twenty Six,
4,22,30,12,15.781442,92.510777,6.354007,119.035002,orange,Winter,Drip,Gabes,Twenty Eight-Twenty Eight,


In [None]:
recommend_case_by_case_balanced({'label': 'chickpea', 'Season': 'Autumn'})


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,Season,Irrigation,gouvernerat,Nearest Fertilizer,Description
0,35,142,203,21.170892,90.237302,5.895319,123.649515,apple,Autumn,Sprinkler,Tunis,Ten-Twenty Six-Twenty Six,"An apple is a round, edible fruit produced by ..."
1,28,72,84,18.729631,19.181973,6.481783,71.580102,chickpea,Winter,Drip,Tunis,Ten-Twenty Six-Twenty Six,The chickpea or chick pea is an annual legume ...
2,39,132,196,35.830891,83.325601,5.778594,73.679849,grapes,Summer,Drip,Tunis,Ten-Twenty Six-Twenty Six,
3,27,47,35,18.39,44.92,7.26,63.54,lentil,Winter,Drip,Sidi Bouz,Fourteen-Thirty Five-Fourteen,The lentil is a legume; it is an annual plant ...
4,8,38,32,29.751508,46.737233,4.981817,91.405983,mango,Summer,Drip,Sfax,Ten-Twenty Six-Twenty Six,


Enrichissement de la base donnée

In [None]:
# import requests
# import pandas as pd

# # Titres exacts pour chaque culture
# correct_titles = {
#     'wheat': 'Wheat',
#     'barley': 'Barley',
#     'peas': 'Pisum_sativum',
#     'garlic': 'Garlic',
#     'onion': 'Onion',
#     'orange': 'Orange_(fruit)',
#     'grapes': 'Grape',
#     'muskmelon': 'Cucumis_melo'
# }

# wiki_api_base = "https://en.wikipedia.org/api/rest_v1/page/summary/"

# results = []

# for crop, title in correct_titles.items():
#     url = wiki_api_base + title
#     try:
#         response = requests.get(url)
#         if response.status_code == 200:
#             data = response.json()
#             desc = data.get("extract", "No description found")
#             results.append({"Crop": crop, "WikiTitle": title.replace("_", " "), "Description": desc})
#             print(f"✅ {crop}")
#         else:
#             results.append({"Crop": crop, "WikiTitle": title, "Description": f"HTTP error {response.status_code}"})
#     except Exception as e:
#         results.append({"Crop": crop, "WikiTitle": title, "Description": str(e)})

# df = pd.DataFrame(results)
# df.to_csv("fixed_wikipedia_crop_descriptions.csv", index=False)
# print("✅ Fichier généré : fixed_wikipedia_crop_descriptions.csv")
# print(df.head())


In [None]:
# crops_description= pd.read_csv("/content/tunisia_crops_descriptions_1.csv")
# crops_description.tail()

In [None]:
# # Charger le fichier CSV
# df = pd.read_csv("/content/tunisia_crops_descriptions_1.csv")

# # Liste des cultures à supprimer
# crops_to_remove = ['wheat', 'barley', 'peas', 'garlic', 'onion', 'orange', 'grapes', 'muskmelon']

# # Supprimer les lignes où la colonne 'Crop' contient ces cultures
# df_filtered = df[~df['Crop'].isin(crops_to_remove)]

# # Sauvegarder dans un nouveau fichier (ou écraser l'ancien si tu veux)
# df_filtered.to_csv("/content/tunisia_crops_descriptions_1.csv", index=False)


In [None]:
# # Charger le fichier d'origine et celui avec les nouvelles descriptions
# original_path = "/content/tunisia_crops_descriptions_1.csv"
# new_data_path = "/content/fixed_wikipedia_crop_descriptions.csv"

# df_original = pd.read_csv(original_path)
# df_new = pd.read_csv(new_data_path)

# # Supprimer la colonne WikiTitle du nouveau fichier
# if 'WikiTitle' in df_new.columns:
#      df_new = df_new.drop(columns=['WikiTitle'])

# # Supprimer les lignes spécifiques de l'ancien fichier
# crops_to_remove = ['wheat', 'barley', 'peas', 'garlic', 'onion', 'orange', 'grapes', 'muskmelon']
# df_original_filtered = df_original[~df_original['Crop'].isin(crops_to_remove)]

# # Concaténer les deux
# df_final = pd.concat([df_original_filtered, df_new], ignore_index=True)

# # Sauvegarder dans le même fichier
# output_path = "/content/tunisia_crops_descriptions_1.csv"
# df_final.to_csv(output_path, index=False)

In [None]:
# import requests

# # Cultures mal décrites et leurs titres corrigés pour Wikipedia
# corrections = {
#      'maize': 'Maize',
#      'chickpea': 'Chickpea',
#      'lentil': 'Lentil',
#      'coriander': 'Coriander',
#      'apple': 'Apple'
#  }

# # Utilisation de l'API REST de Wikipedia
# wiki_api_base = "https://en.wikipedia.org/api/rest_v1/page/summary/"

# results = []

# for crop, title in corrections.items():
#     url = wiki_api_base + title.replace(" ", "_")
#     try:
#         response = requests.get(url)
#         if response.status_code == 200:
#             data = response.json()
#             desc = data.get("extract", "No description found")
#             results.append({"Crop": crop, "Description": desc})
#         else:
#             results.append({"Crop": crop, "Description": f"HTTP error {response.status_code}"})
#     except Exception as e:
#         results.append({"Crop": crop, "Description": str(e)})

# # Résultats dans un DataFrame
# df_fixed = pd.DataFrame(results)
# df_fixed.head()
# output_path = "/content/tunisia_crops_descriptions_1.csv"
# df_fixed.to_csv(output_path, index=False)


modelisation llm

Déploiement