In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [13]:
# Load the datasets
data = pd.read_csv('data.csv')  # Crop recommendation dataset
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')  # Harvest price dataset

# Display column names for reference
print("ICRISAT Dataset Columns:", icrisat_df.columns)


ICRISAT Dataset Columns: Index(['Dist Code', 'Year', 'State Code', 'State Name', 'Dist Name',
       'RICE HARVEST PRICE (Rs per Quintal)',
       'PADDY HARVEST PRICE (Rs per Quintal)',
       'WHEAT HARVEST PRICE (Rs per Quintal)',
       'SORGHUM HARVEST PRICE (Rs per Quintal)',
       'PEARL MILLET HARVEST PRICE (Rs per Quintal)',
       'MAIZE HARVEST PRICE (Rs per Quintal)',
       'FINGER MILLET HARVEST PRICE (Rs per Quintal)',
       'BARLEY HARVEST PRICE (Rs per Quintal)',
       'CHICKPEA HARVEST PRICE (Rs per Quintal)',
       'PIGEONPEA HARVEST PRICE (Rs per Quintal)',
       'GROUNDNUT HARVEST PRICE (Rs per Quintal)',
       'SEASMUM HARVEST PRICE (Rs per Quintal)',
       'RAPE AND MUSTARD HARVEST PRICE (Rs per Quintal)',
       'CASTOR HARVEST PRICE (Rs per Quintal)',
       'LINSEED HARVEST PRICE (Rs per Quintal)',
       'SUGARCANE GUR HARVEST PRICE (Rs per Quintal)',
       'COTTON KAPAS HARVEST PRICE (Rs per Quintal)'],
      dtype='object')


In [21]:
def estimate_income(crop_column, area, filtered_data):
    """
    Estimate income for a given crop and area.
    :param crop_column: The column name representing the crop's harvest price.
    :param area: The area allocated to the crop (in hectares).
    :param filtered_data: Filtered ICRISAT data based on user inputs.
    :return: Estimated income (in Rs).
    """
    if crop_column in filtered_data.columns:
        avg_price = filtered_data[crop_column].mean()  # Average price per quintal
        yield_per_hectare = 20  # Assume 20 quintals/ha (adjust based on actual data)
        return avg_price * yield_per_hectare * area
    return 0


In [23]:
def optimize_crop_combination(available_area, filtered_data, top_n=5):
    """
    Optimize crop combinations to maximize income.
    :param available_area: Total available area (in hectares).
    :param filtered_data: Filtered ICRISAT data based on user inputs.
    :param top_n: Number of top combinations to return.
    :return: List of top N combinations with maximum income.
    """
    # Filter columns that represent harvest prices
    crop_columns = [col for col in filtered_data.columns if 'HARVEST PRICE' in col]
    
    combinations = []
    
    for crop1 in crop_columns:
        for crop2 in crop_columns:
            if crop1 != crop2:
                for area1 in range(1, available_area):  # Allocate area to crop1
                    area2 = available_area - area1  # Remaining area for crop2
                    
                    # Calculate income for both crops
                    income1 = estimate_income(crop1, area1, filtered_data)
                    income2 = estimate_income(crop2, area2, filtered_data)
                    total_income = income1 + income2
                    
                    # Store the combination details
                    combinations.append({
                        'crop1': crop1,
                        'area1': area1,
                        'crop2': crop2,
                        'area2': area2,
                        'total_income': total_income
                    })
    
    # Sort combinations by total income and return the top N results
    return sorted(combinations, key=lambda x: x['total_income'], reverse=True)[:top_n]


In [24]:
# Collect user inputs
state = input("Enter state: ")
district = input("Enter district: ")
market = input("Enter market: ")
predicted_commodity = input("Enter predicted commodity: ")
land_size = float(input("Enter the size of land in hectares: "))

# Filter ICRISAT data based on user inputs (state and district)
filtered_icrisat_df = icrisat_df[
    (icrisat_df['State Name'] == state) & 
    (icrisat_df['Dist Name'] == district)
]

# Check if data is available for the given inputs
if filtered_icrisat_df.empty:
    print(f"No data found for State: {state}, District: {district}. Please try again.")
else:
    print(f"Filtered data contains {filtered_icrisat_df.shape[0]} rows.")


No data found for State: maharashtra, District: thane. Please try again.


In [25]:
if not filtered_icrisat_df.empty:
    # Get top 5 crop combinations for maximum income
    top_combinations = optimize_crop_combination(land_size, filtered_icrisat_df)

    # Display results
    print("\nTop 5 Crop Combinations for Maximum Income:")
    for i, combo in enumerate(top_combinations, 1):
        print(f"{i}. {combo['crop1']} ({combo['area1']} ha) + {combo['crop2']} ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['total_income']:,.2f}")


In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load the datasets
data = pd.read_csv('data.csv')  # Crop recommendation dataset
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')  # Harvest price dataset

# Display column names for reference
print("ICRISAT Dataset Columns:", icrisat_df.columns)
def estimate_income(crop_column, area):
    """
    Estimate income for a given crop and area.
    :param crop_column: The column name representing the crop's harvest price.
    :param area: The area allocated to the crop (in hectares).
    :return: Estimated income (in Rs).
    """
    if crop_column in icrisat_df.columns:
        avg_price = icrisat_df[crop_column].mean()  # Average price per quintal
        yield_per_hectare = 20  # Assume 20 quintals/ha (adjust based on actual data)
        return avg_price * yield_per_hectare * area
    return 0
def optimize_crop_combination(available_area, top_n=5):
    """
    Optimize crop combinations to maximize income.
    :param available_area: Total available area (in hectares).
    :param top_n: Number of top combinations to return.
    :return: List of top N combinations with maximum income.
    """
    # Filter columns that represent harvest prices
    crop_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    
    combinations = []
    
    for crop1 in crop_columns:
        for crop2 in crop_columns:
            if crop1 != crop2:
                for area1 in range(1, available_area):  # Allocate area to crop1
                    area2 = available_area - area1  # Remaining area for crop2
                    
                    # Calculate income for both crops
                    income1 = estimate_income(crop1, area1)
                    income2 = estimate_income(crop2, area2)
                    total_income = income1 + income2
                    
                    # Store the combination details
                    combinations.append({
                        'crop1': crop1,
                        'area1': area1,
                        'crop2': crop2,
                        'area2': area2,
                        'total_income': total_income
                    })
    
    # Sort combinations by total income and return the top N results
    return sorted(combinations, key=lambda x: x['total_income'], reverse=True)[:top_n]
# Set available land area (in hectares)
available_area = 100

# Get top 5 crop combinations for maximum income
top_combinations = optimize_crop_combination(available_area)

# Display results


ICRISAT Dataset Columns: Index(['Dist Code', 'Year', 'State Code', 'State Name', 'Dist Name',
       'RICE HARVEST PRICE (Rs per Quintal)',
       'PADDY HARVEST PRICE (Rs per Quintal)',
       'WHEAT HARVEST PRICE (Rs per Quintal)',
       'SORGHUM HARVEST PRICE (Rs per Quintal)',
       'PEARL MILLET HARVEST PRICE (Rs per Quintal)',
       'MAIZE HARVEST PRICE (Rs per Quintal)',
       'FINGER MILLET HARVEST PRICE (Rs per Quintal)',
       'BARLEY HARVEST PRICE (Rs per Quintal)',
       'CHICKPEA HARVEST PRICE (Rs per Quintal)',
       'PIGEONPEA HARVEST PRICE (Rs per Quintal)',
       'GROUNDNUT HARVEST PRICE (Rs per Quintal)',
       'SEASMUM HARVEST PRICE (Rs per Quintal)',
       'RAPE AND MUSTARD HARVEST PRICE (Rs per Quintal)',
       'CASTOR HARVEST PRICE (Rs per Quintal)',
       'LINSEED HARVEST PRICE (Rs per Quintal)',
       'SUGARCANE GUR HARVEST PRICE (Rs per Quintal)',
       'COTTON KAPAS HARVEST PRICE (Rs per Quintal)'],
      dtype='object')


In [29]:
print("\nTop 5 Crop Combinations for Maximum Income:")
for i, combo in enumerate(top_combinations, 1):
    print(f"{i}. {combo['crop1']} ({combo['area1']} ha) + {combo['crop2']} ({combo['area2']} ha)")
    print(f"   Estimated Income: ₹{combo['total_income']:,.2f}")



Top 5 Crop Combinations for Maximum Income:
1. CHICKPEA HARVEST PRICE (Rs per Quintal) (1 ha) + SEASMUM HARVEST PRICE (Rs per Quintal) (99 ha)
   Estimated Income: ₹2,023,172.41
2. SEASMUM HARVEST PRICE (Rs per Quintal) (99 ha) + CHICKPEA HARVEST PRICE (Rs per Quintal) (1 ha)
   Estimated Income: ₹2,023,172.41
3. GROUNDNUT HARVEST PRICE (Rs per Quintal) (1 ha) + SEASMUM HARVEST PRICE (Rs per Quintal) (99 ha)
   Estimated Income: ₹2,021,896.21
4. SEASMUM HARVEST PRICE (Rs per Quintal) (99 ha) + GROUNDNUT HARVEST PRICE (Rs per Quintal) (1 ha)
   Estimated Income: ₹2,021,896.21
5. PIGEONPEA HARVEST PRICE (Rs per Quintal) (1 ha) + SEASMUM HARVEST PRICE (Rs per Quintal) (99 ha)
   Estimated Income: ₹2,021,513.18


In [8]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from scipy.optimize import minimize

# Load datasets
crop_df = pd.read_csv('data.csv')  # Crop recommendation data
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')

def preprocess_data():
    """Preprocess and integrate datasets."""
    # Create price mapping from ICRISAT data
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    price_map = {}
    for col in price_columns:
        crop_name = col.split(' HARVEST')[0].strip().lower()
        price_map[crop_name] = icrisat_df[col].mean()
    return price_map

def recommend_crops(farm_conditions):
    """Recommend crops using KNN."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    # Sort crops by probability in descending order
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def optimize_allocation(recommended_crops, total_area, price_map):
    """Optimize land allocation for maximum revenue."""
    crop_data = []
    
    for crop, prob in recommended_crops:
        yield_q = yield_df[yield_df['Commodity'].str.lower() == crop.lower()]['Yield_Quintals_Per_Hectare'].values
        price = price_map.get(crop.lower(), 0)
        if len(yield_q) > 0 and price > 0:
            crop_data.append({
                'crop': crop,
                'yield': yield_q[0],
                'price': price,
                'prob': prob
            })
    
    # If no valid crops are found, return empty lists
    if not crop_data:
        print("No valid crops found with sufficient yield and price data.")
        return [], []

    # Objective function to maximize revenue (negative for minimization)
    def objective(areas):
        return -sum(area * crop['yield'] * crop['price'] for area, crop in zip(areas, crop_data))
    
    # Constraints: total area must equal the given area
    constraints = [{'type': 'eq', 'fun': lambda x: np.sum(x) - total_area}]
    
    # Bounds: each crop's area must be between 0 and total_area
    bounds = [(0, total_area) for _ in crop_data]
    
    # Initial guess: equal distribution of area among crops
    x0 = [total_area / len(crop_data)] * len(crop_data)
    
    # Solve optimization problem
    result = minimize(objective, x0, method='SLSQP', bounds=bounds, constraints=constraints)
    
    return result.x, crop_data

def main():
    # Input farm conditions and total land area
    farm_conditions = [
        float(input(f"Enter {param}: ")) for param in 
        ['N (kg/ha)', 'P (kg/ha)', 'K (kg/ha)', 'Temperature (°C)', 
         'Humidity (%)', 'pH', 'Rainfall (mm)']
    ]
    total_area = float(input("Enter total available area (hectares): "))
    
    # Preprocess data and get price mapping
    price_map = preprocess_data()
    
    # Recommend crops based on farm conditions
    recommendations = recommend_crops(farm_conditions)
    
    print("\nTop Recommended Crops:")
    for crop, prob in recommendations[:5]:
        print(f"- {crop.capitalize()} (Probability: {prob:.2%})")
    
    # Optimize land allocation for maximum revenue
    areas, crop_data = optimize_allocation(recommendations[:5], total_area, price_map)
    
    if not areas:
        print("No optimal allocation could be determined due to insufficient data.")
        return
    
    # Display results
    print("\nOptimal Allocation:")
    total_revenue = 0
    for area, data in zip(areas, crop_data):
        if area > 0:
            revenue = area * data['yield'] * data['price']
            total_revenue += revenue
            print(f"{data['crop'].capitalize()}: {area:.1f} ha (₹{revenue:,.0f})")
    
    print(f"\nTotal Estimated Revenue: ₹{total_revenue:,.0f}")

if __name__ == "__main__":
    main()



Top Recommended Crops:
- Rice (Probability: 100.00%)
- Apple (Probability: 0.00%)
- Banana (Probability: 0.00%)
- Blackgram (Probability: 0.00%)
- Chickpea (Probability: 0.00%)

Optimal Allocation:
Rice: 500.0 ha (₹2,800,806)

Total Estimated Revenue: ₹2,800,806




In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools

# Load datasets
crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')

def preprocess_data():
    """Extract crop price data from ICRISAT dataset."""
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    crop_prices = {}
    
    for column in price_columns:
        crop_name = column.split(' HARVEST')[0].strip()
        crop_prices[crop_name] = icrisat_df[column].mean()
    
    return crop_prices, price_columns

def recommend_crops(farm_conditions):
    """Recommend crops using KNN based on farm conditions."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def match_crop_data(recommended_crops, yield_df, price_columns, crop_prices):
    """Match recommended crops with yield and price data."""
    crop_data = {}
    
    for crop_name, probability in recommended_crops:
        # Convert to uppercase for matching with ICRISAT columns
        crop_upper = crop_name.upper()
        
        # Find matching price column
        matching_columns = [col for col in price_columns if crop_upper in col]
        if not matching_columns:
            continue
            
        # Get yield data
        yield_values = yield_df[yield_df['Commodity'].str.lower() == crop_name.lower()]['Yield_Quintals_Per_Hectare'].values
        if len(yield_values) == 0:
            continue
            
        # Get the crop name from the price column
        icrisat_crop_name = matching_columns[0].split(' HARVEST')[0].strip()
        
        crop_data[icrisat_crop_name] = {
            'yield': yield_values[0],
            'price': crop_prices[icrisat_crop_name],
            'probability': probability
        }
    
    return crop_data

def generate_crop_combinations(crop_data, total_area=100):
    """Generate all possible combinations of crops with fixed area allocation."""
    combinations = []
    
    # Set fixed areas as per screenshot (1 ha for one crop, rest for the other)
    small_area = 1
    large_area = total_area - small_area
    
    # Generate all possible permutations of two crops
    for crop1, crop2 in itertools.permutations(crop_data.keys(), 2):
        # Calculate revenue
        revenue = (small_area * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                 (large_area * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
        
        combinations.append({
            'crop1': crop1,
            'area1': small_area,
            'crop2': crop2,
            'area2': large_area,
            'revenue': revenue
        })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations[:5]  # Return top 5 combinations

def main():
    # Input farm conditions and total land area
    farm_conditions = [
        float(input(f"Enter {param}: ")) for param in 
        ['N (kg/ha)', 'P (kg/ha)', 'K (kg/ha)', 'Temperature (°C)', 
         'Humidity (%)', 'pH', 'Rainfall (mm)']
    ]
    total_area = float(input("Enter total available area (hectares): "))
    
    # Get crop prices and price columns
    crop_prices, price_columns = preprocess_data()
    
    # Get crop recommendations based on farm conditions
    recommended_crops = recommend_crops(farm_conditions)
    
    # Match recommended crops with yield and price data
    crop_data = match_crop_data(recommended_crops[:10], yield_df, price_columns, crop_prices)
    
    # Generate crop combinations for maximum revenue
    combinations = generate_crop_combinations(crop_data, total_area)
    
    # Display results in the format shown in the screenshot
    print("\nTop 5 Crop Combinations for Maximum Income:")
    for i, combo in enumerate(combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")

if __name__ == "__main__":
    main()



Top 5 Crop Combinations for Maximum Income:
1. RICE HARVEST PRICE (Rs per Quintal) (1 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (99.0 ha)
   Estimated Income: ₹595,163.45
2. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (1 ha) + RICE HARVEST PRICE (Rs per Quintal) (99.0 ha)
   Estimated Income: ₹560,514.72




In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools

# Load datasets
crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')

def preprocess_data():
    """Extract crop price data from ICRISAT dataset."""
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    crop_prices = {}
    
    for column in price_columns:
        crop_name = column.split(' HARVEST')[0].strip()
        crop_prices[crop_name] = icrisat_df[column].mean()
    
    return crop_prices, price_columns

def recommend_crops(farm_conditions):
    """Recommend crops using KNN based on farm conditions."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def match_crop_data(recommended_crops, yield_df, price_columns, crop_prices):
    """Match recommended crops with yield and price data."""
    crop_data = {}
    
    for crop_name, probability in recommended_crops:
        # Convert to uppercase for matching with ICRISAT columns
        crop_upper = crop_name.upper()
        
        # Find matching price column
        matching_columns = [col for col in price_columns if crop_upper in col]
        if not matching_columns:
            continue
            
        # Get yield data
        yield_values = yield_df[yield_df['Commodity'].str.lower() == crop_name.lower()]['Yield_Quintals_Per_Hectare'].values
        if len(yield_values) == 0:
            continue
            
        # Get the crop name from the price column
        icrisat_crop_name = matching_columns[0].split(' HARVEST')[0].strip()
        
        crop_data[icrisat_crop_name] = {
            'yield': yield_values[0],
            'price': crop_prices[icrisat_crop_name],
            'probability': probability
        }
    
    return crop_data

def generate_crop_combinations(crop_data, total_area=100):
    """Generate combinations of crops with varying area allocations."""
    combinations = []
    
    # Get all pairs of crops
    crop_pairs = list(itertools.permutations(crop_data.keys(), 2))
    
    # Try different area allocations with step size of 10
    for area1 in range(0, int(total_area + 1), 10):
        area2 = total_area - area1
        
        for crop1, crop2 in crop_pairs:
            # Calculate revenue
            revenue = (area1 * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                     (area2 * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue
            })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations

def main():
    # Input farm conditions and total land area
    farm_conditions = [
        float(input(f"Enter {param}: ")) for param in 
        ['N (kg/ha)', 'P (kg/ha)', 'K (kg/ha)', 'Temperature (°C)', 
         'Humidity (%)', 'pH', 'Rainfall (mm)']
    ]
    total_area = float(input("Enter total available area (hectares): "))
    
    # Get crop prices and price columns
    crop_prices, price_columns = preprocess_data()
    
    # Get crop recommendations based on farm conditions
    recommended_crops = recommend_crops(farm_conditions)
    
    # Match recommended crops with yield and price data
    crop_data = match_crop_data(recommended_crops[:10], yield_df, price_columns, crop_prices)
    
    # Generate crop combinations with varying area allocations
    combinations = generate_crop_combinations(crop_data, total_area)
    
    # Store all revenues in an array
    revenues = [combo['revenue'] for combo in combinations]
    
    # Find the maximum revenue
    max_revenue = max(revenues) if revenues else 0
    
    # Find the combination(s) that give the maximum revenue
    max_combinations = [combo for combo in combinations if combo['revenue'] == max_revenue]
    
    # Display all tested combinations
    print("\nAll Tested Crop Combinations:")
    for i, combo in enumerate(combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the top 5 combinations
    print("\nTop 5 Crop Combinations for Maximum Income:")
    for i, combo in enumerate(combinations[:5], 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the combination(s) with maximum revenue
    print("\nCombination(s) with Maximum Revenue:")
    for i, combo in enumerate(max_combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Maximum Estimated Income: ₹{combo['revenue']:,.2f}")

if __name__ == "__main__":
    main()



All Tested Crop Combinations:
1. RICE HARVEST PRICE (Rs per Quintal) (0 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (100.0 ha)
   Estimated Income: ₹595,517.01
2. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (100 ha) + RICE HARVEST PRICE (Rs per Quintal) (0.0 ha)
   Estimated Income: ₹595,517.01
3. RICE HARVEST PRICE (Rs per Quintal) (10 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (90.0 ha)
   Estimated Income: ₹591,981.42
4. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (90 ha) + RICE HARVEST PRICE (Rs per Quintal) (10.0 ha)
   Estimated Income: ₹591,981.42
5. RICE HARVEST PRICE (Rs per Quintal) (20 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (80.0 ha)
   Estimated Income: ₹588,445.84
6. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (80 ha) + RICE HARVEST PRICE (Rs per Quintal) (20.0 ha)
   Estimated Income: ₹588,445.84
7. RICE HARVEST PRICE (Rs per Quintal) (30 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (70.0 ha)
   Estimated Income: ₹584,910.25
8. COTTON KAPAS H



In [3]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools

# Load datasets
crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')
price_df = pd.read_csv('pricesdataset.csv')

def preprocess_data():
    """Extract crop price data from ICRISAT dataset."""
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    crop_prices = {}
    
    for column in price_columns:
        crop_name = column.split(' HARVEST')[0].strip()
        crop_prices[crop_name] = icrisat_df[column].mean()
    
    return crop_prices, price_columns

def get_suitable_crops(n, p, k, temp, humidity, ph, rainfall):
    """Find suitable crops based on soil parameters using KNN."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    farm_conditions = [n, p, k, temp, humidity, ph, rainfall]
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    # Return all crops with their suitability probability
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def match_crop_data(suitable_crops, yield_df, price_columns, crop_prices):
    """Match suitable crops with yield and price data."""
    crop_data = {}
    
    for crop_name, probability in suitable_crops:
        # Convert to uppercase for matching with ICRISAT columns
        crop_upper = crop_name.upper()
        
        # Find matching price column
        matching_columns = [col for col in price_columns if crop_upper in col]
        if not matching_columns:
            continue
            
        # Get yield data
        yield_values = yield_df[yield_df['Commodity'].str.lower() == crop_name.lower()]['Yield_Quintals_Per_Hectare'].values
        if len(yield_values) == 0:
            continue
            
        # Get the crop name from the price column
        icrisat_crop_name = matching_columns[0].split(' HARVEST')[0].strip()
        
        crop_data[icrisat_crop_name] = {
            'yield': yield_values[0],
            'price': crop_prices[icrisat_crop_name],
            'probability': probability
        }
    
    return crop_data

def generate_crop_combinations(crop_data, total_area=100, step_size=5):
    """Generate all crop combinations with variable area allocations."""
    combinations = []
    
    # Get all pairs of crops
    crop_pairs = list(itertools.permutations(crop_data.keys(), 2))
    
    # Try different area allocations with specified step size
    for area1 in range(0, int(total_area + 1), step_size):
        area2 = total_area - area1
        
        for crop1, crop2 in crop_pairs:
            # Calculate revenue
            revenue = (area1 * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                     (area2 * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue,
                'crop1_prob': crop_data[crop1]['probability'],
                'crop2_prob': crop_data[crop2]['probability']
            })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations

def main():
    # Input farm conditions and total land area
    print("Enter soil and climate parameters:")
    n = float(input("Enter Nitrogen (N) content (kg/ha): "))
    p = float(input("Enter Phosphorus (P) content (kg/ha): "))
    k = float(input("Enter Potassium (K) content (kg/ha): "))
    temp = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    ph = float(input("Enter pH: "))
    rainfall = float(input("Enter Rainfall (mm): "))
    
    total_area = float(input("Enter total available area (hectares): "))
    step_size = int(input("Enter step size for area allocation (5-10 recommended): "))
    
    # Preprocess data
    crop_prices, price_columns = preprocess_data()
    
    # Get suitable crops based on soil parameters
    suitable_crops = get_suitable_crops(n, p, k, temp, humidity, ph, rainfall)
    
    print("\nTop 10 Recommended Crops Based on Soil Parameters:")
    for i, (crop, prob) in enumerate(suitable_crops[:10], 1):
        print(f"{i}. {crop.capitalize()} (Suitability: {prob:.2%})")
    
    # Match suitable crops with yield and price data
    crop_data = match_crop_data(suitable_crops, yield_df, price_columns, crop_prices)
    
    # Generate crop combinations with varying area allocations
    combinations = generate_crop_combinations(crop_data, total_area, step_size)
    
    # Store all revenues
    revenues = [combo['revenue'] for combo in combinations]
    
    # Find the maximum revenue
    max_revenue = max(revenues) if revenues else 0
    
    # Find the combination(s) that give the maximum revenue
    max_combinations = [combo for combo in combinations if combo['revenue'] == max_revenue]
    
    # Display all tested combinations
    print("\nAll Tested Crop Combinations:")
    for i, combo in enumerate(combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the top 5 combinations
    print("\nTop 5 Crop Combinations for Maximum Income:")
    for i, combo in enumerate(combinations[:5], 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the combination(s) with maximum revenue
    print("\nOptimal Crop Combination(s) for Maximum Revenue:")
    for i, combo in enumerate(max_combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Maximum Estimated Income: ₹{combo['revenue']:,.2f}")
        print(f"   Soil Suitability: {combo['crop1']} ({combo['crop1_prob']:.2%}), {combo['crop2']} ({combo['crop2_prob']:.2%})")

if __name__ == "__main__":
    main()


Enter soil and climate parameters:

Top 10 Recommended Crops Based on Soil Parameters:
1. Rice (Suitability: 100.00%)
2. Apple (Suitability: 0.00%)
3. Banana (Suitability: 0.00%)
4. Blackgram (Suitability: 0.00%)
5. Chickpea (Suitability: 0.00%)
6. Coconut (Suitability: 0.00%)
7. Coffee (Suitability: 0.00%)
8. Cotton (Suitability: 0.00%)
9. Grapes (Suitability: 0.00%)
10. Jute (Suitability: 0.00%)

All Tested Crop Combinations:
1. RICE HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (1000.0 ha)
   Estimated Income: ₹7,237,727.71
2. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (1000.0 ha)
   Estimated Income: ₹7,237,727.71
3. MAIZE HARVEST PRICE (Rs per Quintal) (994 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (6.0 ha)
   Estimated Income: ₹7,230,032.36
4. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (7 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (993.0 ha)
   Estimated Income: ₹7,228,749.80
5. MAIZE HARVEST P



In [6]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools

# Load datasets
crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')
price_df = pd.read_csv('pricesdataset.csv')

def preprocess_data():
    """Extract crop price data from ICRISAT dataset."""
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    crop_prices = {}
    
    for column in price_columns:
        crop_name = column.split(' HARVEST')[0].strip()
        crop_prices[crop_name] = icrisat_df[column].mean()
    
    return crop_prices, price_columns

def get_suitable_crops(n, p, k, temp, humidity, ph, rainfall):
    """Find suitable crops based on soil parameters using KNN."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    farm_conditions = [n, p, k, temp, humidity, ph, rainfall]
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    # Return all crops with their suitability probability
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def match_crop_data(suitable_crops, yield_df, price_columns, crop_prices):
    """Match suitable crops with yield and price data."""
    crop_data = {}
    
    for crop_name, probability in suitable_crops:
        # Convert to uppercase for matching with ICRISAT columns
        crop_upper = crop_name.upper()
        
        # Find matching price column
        matching_columns = [col for col in price_columns if crop_upper in col]
        if not matching_columns:
            continue
            
        # Get yield data
        yield_values = yield_df[yield_df['Commodity'].str.lower() == crop_name.lower()]['Yield_Quintals_Per_Hectare'].values
        if len(yield_values) == 0:
            continue
            
        # Get the crop name from the price column
        icrisat_crop_name = matching_columns[0].split(' HARVEST')[0].strip()
        
        crop_data[icrisat_crop_name] = {
            'yield': yield_values[0],
            'price': crop_prices[icrisat_crop_name],
            'probability': probability
        }
    
    return crop_data

def generate_crop_combinations(crop_data, total_area=100, step_size=10):
    """Generate all crop combinations with variable area allocations."""
    combinations = []
    
    # Get all pairs of crops
    crop_pairs = list(itertools.permutations(crop_data.keys(), 2))
    
    # Try different area allocations with specified step size
    for area1 in range(0, int(total_area + 1), step_size):
        area2 = total_area - area1
        
        for crop1, crop2 in crop_pairs:
            # Calculate revenue
            revenue = (area1 * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                     (area2 * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue,
                'crop1_prob': crop_data[crop1]['probability'],
                'crop2_prob': crop_data[crop2]['probability']
            })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations

def refine_recommendations(combinations, soil_suitability_weight=0.3):
    """Refine recommendations by balancing revenue with soil suitability."""
    refined_combinations = []
    
    for combo in combinations:
        # Calculate soil suitability score (average of both crops' probabilities)
        soil_score = (combo['crop1_prob'] + combo['crop2_prob']) / 2
        
        # Create a combined score that balances revenue and soil suitability
        # We normalize revenue by the maximum revenue in the combinations
        max_revenue = combinations[0]['revenue']
        
        # Normalized revenue (0-1 scale)
        norm_revenue = combo['revenue'] / max_revenue
        
        # Combined score - weighted average of revenue and soil suitability
        combined_score = (1 - soil_suitability_weight) * norm_revenue + soil_suitability_weight * soil_score
        
        combo['combined_score'] = combined_score
        refined_combinations.append(combo)
    
    # Sort by combined score
    refined_combinations.sort(key=lambda x: x['combined_score'], reverse=True)
    return refined_combinations

def display_crop_recommendations(ordered_by='revenue'):
    """Main function to recommend crops based on soil and area."""
    # Input farm conditions
    print("Enter soil and climate parameters:")
    n = float(input("Enter Nitrogen (N) content (kg/ha): "))
    p = float(input("Enter Phosphorus (P) content (kg/ha): "))
    k = float(input("Enter Potassium (K) content (kg/ha): "))
    temp = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    ph = float(input("Enter pH: "))
    rainfall = float(input("Enter Rainfall (mm): "))
    
    total_area = float(input("Enter total available area (hectares): "))
    step_size = int(input("Enter step size for area allocation (10 recommended): "))
    
    # Preprocess data
    crop_prices, price_columns = preprocess_data()
    
    # Get suitable crops based on soil parameters
    suitable_crops = get_suitable_crops(n, p, k, temp, humidity, ph, rainfall)
    
    print("\nRecommended Crops (In Order of Suitability):")
    for i, (crop, prob) in enumerate(suitable_crops[:10], 1):
        print(f"{i}. {crop.capitalize()}")
    
    # Match suitable crops with yield and price data
    crop_data = match_crop_data(suitable_crops, yield_df, price_columns, crop_prices)
    
    # Check if we have enough crop data
    if len(crop_data) < 2:
        print("Insufficient crop data. Please check the datasets or adjust soil parameters.")
        return
    
    # Generate crop combinations with varying area allocations
    combinations = generate_crop_combinations(crop_data, total_area, step_size)
    
    # Refine recommendations if ordered by combined score
    if ordered_by == 'combined':
        combinations = refine_recommendations(combinations)
    
    # Display all tested combinations
    print("\nAll Tested Crop Combinations:")
    for i, combo in enumerate(combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the top 5 combinations
    print("\nTop 5 Crop Combinations for Maximum Income:")
    for i, combo in enumerate(combinations[:5], 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Store all revenues
    revenues = [combo['revenue'] for combo in combinations]
    
    # Find the maximum revenue
    max_revenue = max(revenues) if revenues else 0
    
    # Find the combination(s) that give the maximum revenue
    max_combinations = [combo for combo in combinations if combo['revenue'] == max_revenue]
    
    # Display the maximum revenue combination
    print("\nOptimal Crop Combination for Maximum Revenue:")
    for i, combo in enumerate(max_combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Maximum Estimated Income: ₹{combo['revenue']:,.2f}")
        print(f"   Soil Suitability: {combo['crop1']} ({combo['crop1_prob']:.2%}), {combo['crop2']} ({combo['crop2_prob']:.2%})")

def main():
    """Main entry point with option to choose recommendation criteria."""
    print("Crop Recommendation System")
    print("==========================")
    print("1. Recommend crops based on maximum revenue")
    print("2. Recommend crops balancing revenue and soil suitability")
    
    choice = input("Enter your choice (1 or 2): ")
    
    if choice == '2':
        display_crop_recommendations('combined')
    else:
        display_crop_recommendations('revenue')

if __name__ == "__main__":
    main()


Crop Recommendation System
1. Recommend crops based on maximum revenue
2. Recommend crops balancing revenue and soil suitability
Enter soil and climate parameters:

Recommended Crops (In Order of Suitability):
1. Coconut)
2. Jute)
3. Pigeonpeas)
4. Apple)
5. Banana)
6. Blackgram)
7. Chickpea)
8. Coffee)
9. Cotton)
10. Grapes)

All Tested Crop Combinations:
1. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (200.0 ha)
   Estimated Income: ₹1,447,545.54
2. RICE HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (200.0 ha)
   Estimated Income: ₹1,447,545.54
3. MAIZE HARVEST PRICE (Rs per Quintal) (200 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (0.0 ha)
   Estimated Income: ₹1,447,545.54
4. MAIZE HARVEST PRICE (Rs per Quintal) (200 ha) + RICE HARVEST PRICE (Rs per Quintal) (0.0 ha)
   Estimated Income: ₹1,447,545.54
5. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (4 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (196.0 h



In [7]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools


crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')
price_df = pd.read_csv('pricesdataset.csv')

def preprocess_data():
    """Extract crop price data from ICRISAT dataset."""
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    crop_prices = {}
    
    for column in price_columns:
        crop_name = column.split(' HARVEST')[0].strip()
        crop_prices[crop_name] = icrisat_df[column].mean()
    
    return crop_prices, price_columns

def get_suitable_crops(n, p, k, temp, humidity, ph, rainfall):
    """Find suitable crops based on soil parameters using KNN."""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    farm_conditions = [n, p, k, temp, humidity, ph, rainfall]
    probabilities = knn.predict_proba([farm_conditions])[0]
    crops = knn.classes_
    
    # Return all crops with their suitability probability
    return sorted(zip(crops, probabilities), key=lambda x: x[1], reverse=True)

def match_crop_data(suitable_crops, yield_df, price_columns, crop_prices):
    """Match suitable crops with yield and price data."""
    crop_data = {}
    
    for crop_name, probability in suitable_crops:
        # Convert to uppercase for matching with ICRISAT columns
        crop_upper = crop_name.upper()
        
        # Find matching price column
        matching_columns = [col for col in price_columns if crop_upper in col]
        if not matching_columns:
            continue
            
        # Get yield data
        yield_values = yield_df[yield_df['Commodity'].str.lower() == crop_name.lower()]['Yield_Quintals_Per_Hectare'].values
        if len(yield_values) == 0:
            continue
            
        # Get the crop name from the price column
        icrisat_crop_name = matching_columns[0].split(' HARVEST')[0].strip()
        
        crop_data[icrisat_crop_name] = {
            'yield': yield_values[0],
            'price': crop_prices[icrisat_crop_name],
            'probability': probability
        }
    
    return crop_data

def generate_crop_combinations(crop_data, total_area=100, step_size=10):
    """Generate all crop combinations with variable area allocations."""
    combinations = []
    
    # Get all pairs of crops
    crop_pairs = list(itertools.permutations(crop_data.keys(), 2))
    
    # Try different area allocations with specified step size
    for area1 in range(0, int(total_area + 1), step_size):
        area2 = total_area - area1
        
        for crop1, crop2 in crop_pairs:
            # Calculate revenue
            revenue = (area1 * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                     (area2 * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue,
                'crop1_prob': crop_data[crop1]['probability'],
                'crop2_prob': crop_data[crop2]['probability']
            })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations

def generate_top_suitable_crop_combinations(crop_data, top_suitable_crops, total_area=100, step_size=10):
    """Generate combinations using only the top suitable crops."""
    combinations = []
    
    # Filter to include only the top suitable crops
    suitable_crop_data = {crop: data for crop, data in crop_data.items() 
                         if crop.upper() in [c.upper() for c in top_suitable_crops]}
    
    # If we don't have enough suitable crops with data, return empty list
    if len(suitable_crop_data) < 2:
        return []
    
    # Get all pairs of these suitable crops
    crop_pairs = list(itertools.permutations(suitable_crop_data.keys(), 2))
    
    # Try different area allocations with specified step size
    for area1 in range(0, int(total_area + 1), step_size):
        area2 = total_area - area1
        
        for crop1, crop2 in crop_pairs:
            # Calculate revenue
            revenue = (area1 * suitable_crop_data[crop1]['yield'] * suitable_crop_data[crop1]['price']) + \
                     (area2 * suitable_crop_data[crop2]['yield'] * suitable_crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue,
                'crop1_prob': suitable_crop_data[crop1]['probability'],
                'crop2_prob': suitable_crop_data[crop2]['probability']
            })
    
    # Sort by revenue in descending order
    combinations.sort(key=lambda x: x['revenue'], reverse=True)
    return combinations

def main():
    """Main function to recommend and compare crop combinations."""
    # Input farm conditions and total land area
    print("Enter soil and climate parameters:")
    n = float(input("Enter Nitrogen (N) content (kg/ha): "))
    p = float(input("Enter Phosphorus (P) content (kg/ha): "))
    k = float(input("Enter Potassium (K) content (kg/ha): "))
    temp = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    ph = float(input("Enter pH: "))
    rainfall = float(input("Enter Rainfall (mm): "))
    
    total_area = float(input("Enter total available area (hectares): "))
    step_size = int(input("Enter step size for area allocation (10 recommended): "))
    
    # Preprocess data
    crop_prices, price_columns = preprocess_data()
    
    # Get suitable crops based on soil parameters
    suitable_crops = get_suitable_crops(n, p, k, temp, humidity, ph, rainfall)
    
    print("\nRecommended Crops (In Order of Suitability):")
    for i, (crop, prob) in enumerate(suitable_crops[:10], 1):
        print(f"{i}. {crop.capitalize()}")
    
    # Get the names of the top 2 most suitable crops
    top_suitable_crop_names = [crop for crop, _ in suitable_crops[:2]]
    
    # Match all suitable crops with yield and price data
    crop_data = match_crop_data(suitable_crops, yield_df, price_columns, crop_prices)
    
    # Check if we have enough crop data
    if len(crop_data) < 2:
        print("Insufficient crop data. Please check the datasets or adjust soil parameters.")
        return
    
    # Generate all crop combinations with varying area allocations
    all_combinations = generate_crop_combinations(crop_data, total_area, step_size)
    
    # Generate combinations using only the top suitable crops
    suitable_combinations = generate_top_suitable_crop_combinations(crop_data, top_suitable_crop_names, total_area, step_size)
    
    # Display all tested combinations
    print("\nAll Tested Crop Combinations:")
    for i, combo in enumerate(all_combinations, 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
    
    # Display the top 5 combinations for maximum revenue
    print("\nTop 5 Crop Combinations for Maximum Income (Regardless of Suitability):")
    for i, combo in enumerate(all_combinations[:5], 1):
        print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
        print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
        print(f"   Soil Suitability: {combo['crop1']} ({combo['crop1_prob']:.2%}), {combo['crop2']} ({combo['crop2_prob']:.2%})")
    
    # Display the best combinations from the most suitable crops
    if suitable_combinations:
        print("\nBest Combinations Using Top 2 Most Suitable Crops:")
        for i, combo in enumerate(suitable_combinations[:3], 1):
            print(f"{i}. {combo['crop1']} HARVEST PRICE (Rs per Quintal) ({combo['area1']} ha) + {combo['crop2']} HARVEST PRICE (Rs per Quintal) ({combo['area2']} ha)")
            print(f"   Estimated Income: ₹{combo['revenue']:,.2f}")
            print(f"   Soil Suitability: {combo['crop1']} ({combo['crop1_prob']:.2%}), {combo['crop2']} ({combo['crop2_prob']:.2%})")
    else:
        print("\nNo valid combinations found using the top 2 most suitable crops.")
    
    # Compare the highest revenue combination with the best suitable combination
    print("\nComparison for Farmer's Decision:")
    print("---------------------------------")
    if all_combinations:
        max_revenue_combo = all_combinations[0]
        print(f"Highest Revenue Combination:")
        print(f"Crops: {max_revenue_combo['crop1']} ({max_revenue_combo['area1']} ha) + {max_revenue_combo['crop2']} ({max_revenue_combo['area2']} ha)")
        print(f"Estimated Income: ₹{max_revenue_combo['revenue']:,.2f}")
        print(f"Soil Suitability: {max_revenue_combo['crop1']} ({max_revenue_combo['crop1_prob']:.2%}), {max_revenue_combo['crop2']} ({max_revenue_combo['crop2_prob']:.2%})")
    
    if suitable_combinations:
        best_suitable_combo = suitable_combinations[0]
        print(f"\nBest Combination from Most Suitable Crops:")
        print(f"Crops: {best_suitable_combo['crop1']} ({best_suitable_combo['area1']} ha) + {best_suitable_combo['crop2']} ({best_suitable_combo['area2']} ha)")
        print(f"Estimated Income: ₹{best_suitable_combo['revenue']:,.2f}")
        print(f"Soil Suitability: {best_suitable_combo['crop1']} ({best_suitable_combo['crop1_prob']:.2%}), {best_suitable_combo['crop2']} ({best_suitable_combo['crop2_prob']:.2%})")
        
        # Calculate the revenue difference
        if all_combinations:
            revenue_difference = max_revenue_combo['revenue'] - best_suitable_combo['revenue']
            percentage_difference = (revenue_difference / best_suitable_combo['revenue']) * 100
            
            print(f"\nRevenue Difference:")
            print(f"The highest revenue combination generates ₹{revenue_difference:,.2f} more than the most suitable combination.")
            print(f"This is {percentage_difference:.2f}% more income.")
            
            print("\nFarmer's Consideration:")
            print("While the highest revenue combination offers more income, the most suitable crops are better adapted to your soil conditions.")
            print("This may result in more consistent yields, lower input costs for soil amendments, and more sustainable farming in the long term.")
    else:
        print("\nNo valid combinations using the most suitable crops were found.")
        print("Consider improving soil conditions or exploring other crop options that may be both suitable and profitable.")

if __name__ == "__main__":
    main()


Enter soil and climate parameters:

Recommended Crops (In Order of Suitability):
1. Pigeonpeas (Suitability: 100.00%)
2. Apple (Suitability: 0.00%)
3. Banana (Suitability: 0.00%)
4. Blackgram (Suitability: 0.00%)
5. Chickpea (Suitability: 0.00%)
6. Coconut (Suitability: 0.00%)
7. Coffee (Suitability: 0.00%)
8. Cotton (Suitability: 0.00%)
9. Grapes (Suitability: 0.00%)
10. Jute (Suitability: 0.00%)

All Tested Crop Combinations:
1. COTTON KAPAS HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (200.0 ha)
   Estimated Income: ₹1,447,545.54
2. RICE HARVEST PRICE (Rs per Quintal) (0 ha) + MAIZE HARVEST PRICE (Rs per Quintal) (200.0 ha)
   Estimated Income: ₹1,447,545.54
3. MAIZE HARVEST PRICE (Rs per Quintal) (198 ha) + COTTON KAPAS HARVEST PRICE (Rs per Quintal) (2.0 ha)
   Estimated Income: ₹1,444,980.43
4. MAIZE HARVEST PRICE (Rs per Quintal) (198 ha) + RICE HARVEST PRICE (Rs per Quintal) (2.0 ha)
   Estimated Income: ₹1,444,273.31
5. COTTON KAPAS HARVEST PRIC



In [20]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import itertools

# Load datasets
crop_df = pd.read_csv('data.csv')
yield_df = pd.read_csv('commodity_yield.csv')
icrisat_df = pd.read_csv('ICRISAT-District Level Data.csv')
price_df = pd.read_csv('pricedataset.csv')

def preprocess_data():
    """Extract crop price data from multiple sources"""
    # Get market prices
    market_prices = price_df.groupby('Commodity')['Modal Price'].mean().to_dict()
    
    # Get ICRISAT prices
    price_columns = [col for col in icrisat_df.columns if 'HARVEST PRICE' in col]
    icrisat_prices = {}
    for col in price_columns:
        crop_name = col.split(' HARVEST')[0].strip()
        icrisat_prices[crop_name] = icrisat_df[col].mean()
    
    # Merge prices with market priority
    return {**market_prices, **icrisat_prices}, price_columns

def get_suitable_crops(farm_conditions):
    """Get crops sorted by suitability using KNN"""
    X = crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = crop_df['label']
    
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, y)
    
    probabilities = knn.predict_proba([farm_conditions])[0]
    return sorted(zip(knn.classes_, probabilities), key=lambda x: x[1], reverse=True)

def get_crop_data(suitable_crops, yield_df, price_data):
    """Get crop details with fallback handling"""
    crop_details = {}
    for crop, prob in suitable_crops:
        crop_upper = crop.upper()
        # Get yield with fallback
        try:
            yield_val = yield_df[yield_df['Commodity'].str.lower() == crop.lower()]['Yield_Quintals_Per_Hectare'].values[0]
        except:
            yield_val = 0  # Default to 0 if no yield data
            
        # Get price with fallback
        price = price_data.get(crop_upper, 0)
        
        crop_details[crop_upper] = {
            'yield': yield_val,
            'price': price,
            'probability': prob
        }
    return crop_details

def generate_combinations(crop_data, total_area, step_size, top_crops):
    """Generate all possible combinations with step-based allocations"""
    combinations = []
    
    # Always include top crops even with 0 values
    for crop in top_crops:
        if crop not in crop_data:
            crop_data[crop] = {'yield': 0, 'price': 0, 'probability': 0}
    
    crops = list(crop_data.keys())
    
    # Create all possible pairs including top crops
    for crop1, crop2 in itertools.permutations(crops, 2):
        # Generate area splits using user-defined step size
        for area1 in range(0, int(total_area)+1, step_size):
            area2 = total_area - area1
            if area2 < 0: continue
            
            revenue = (area1 * crop_data[crop1]['yield'] * crop_data[crop1]['price']) + \
                     (area2 * crop_data[crop2]['yield'] * crop_data[crop2]['price'])
            
            combinations.append({
                'crop1': crop1,
                'area1': area1,
                'crop2': crop2,
                'area2': area2,
                'revenue': revenue,
                'crop1_prob': crop_data[crop1]['probability'],
                'crop2_prob': crop_data[crop2]['probability']
            })
    
    # Sort by descending revenue
    return sorted(combinations, key=lambda x: x['revenue'], reverse=True)

def main():
    """Main recommendation workflow"""
    # Get farm parameters
    farm_conditions = [
        float(input(f"Enter {param}: ")) for param in 
        ['N (kg/ha)', 'P (kg/ha)', 'K (kg/ha)', 'Temperature (°C)', 
         'Humidity (%)', 'pH', 'Rainfall (mm)']
    ]
    total_area = float(input("Total land area (hectares): "))
    step_size = int(input("Allocation step size (e.g., 10 for 10% increments): "))
    
    # Preprocess data
    price_data, _ = preprocess_data()
    
    # Get suitability list
    suitable_crops = get_suitable_crops(farm_conditions)
    top_crops = [crop.upper() for crop, _ in suitable_crops[:2]]  # Top 2 suitable
    
    print("\nTop Suitable Crops:")
    for i, (crop, prob) in enumerate(suitable_crops[:5], 1):
        print(f"{i}. {crop} ({prob:.1%} suitability)")
    
    # Get crop details
    crop_data = get_crop_data(suitable_crops, yield_df, price_data)
    
    # Generate combinations
    combinations = generate_combinations(crop_data, total_area, step_size, top_crops)
    
    # Display results
    print("\nTested Combinations (Top 20):")
    for combo in combinations[:20]:
        print(f"{combo['crop1']} ({combo['area1']}ha) + {combo['crop2']} ({combo['area2']}ha)")
        print(f"  Revenue: ₹{combo['revenue']:,.2f}")
        print(f"  Suitability: {combo['crop1_prob']:.1%} + {combo['crop2_prob']:.1%}")
    
    # Show optimal combination
    optimal = combinations[0]
    print("\nOptimal Revenue Combination:")
    print(f"{optimal['crop1']} ({optimal['area1']}ha) + {optimal['crop2']} ({optimal['area2']}ha)")
    print(f"Maximum Revenue: ₹{optimal['revenue']:,.2f}")
    
    # Show best suitable combination
    suitable_combos = [c for c in combinations if c['crop1'] in top_crops or c['crop2'] in top_crops]
    if suitable_combos:
        best_suitable = max(suitable_combos, key=lambda x: x['revenue'])
        print("\nBest Suitable Crop Combination:")
        print(f"{best_suitable['crop1']} ({best_suitable['area1']}ha) + {best_suitable['crop2']} ({best_suitable['area2']}ha)")
        print(f"Revenue: ₹{best_suitable['revenue']:,.2f}")
 

if __name__ == "__main__":
    main()



Top Suitable Crops:
1. coffee (60.0% suitability)
2. coconut (20.0% suitability)
3. jute (20.0% suitability)
4. apple (0.0% suitability)
5. banana (0.0% suitability)

Tested Combinations (Top 20):
COFFEE (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 60.0% + 0.0%
COCONUT (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 20.0% + 0.0%
JUTE (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 20.0% + 0.0%
APPLE (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
BANANA (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
BLACKGRAM (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
CHICKPEA (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
COTTON (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
GRAPES (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitability: 0.0% + 0.0%
KIDNEYBEANS (0ha) + MAIZE (100.0ha)
  Revenue: ₹723,772.77
  Suitab

