In [16]:
def preprocess_data():
    # Load datasets
    merged_data = pd.read_csv('final_dataset.csv')
    crop_production = pd.read_csv('crop_production.csv')
    district_population = pd.read_csv('district_level_population.csv')
    food = pd.read_csv('food.csv')
    nutrition = pd.read_csv('nutrition.csv')
    rainfall = pd.read_csv('rainfall.csv')
    yield_complete = pd.read_csv('yield_complete.csv')
    necessary_nutrition = pd.read_csv('necessary_nutrition.csv')
    prices_data = pd.read_csv('prices_districtwise.csv')
    
    # Standardize column names
    prices_data.columns = prices_data.columns.str.lower()
    prices_data = prices_data.map(lambda x: x.lower() if isinstance(x, str) else x)

    # Convert date column to datetime
    prices_data['arrival_date'] = pd.to_datetime(prices_data['arrival_date'], format='%d/%m/%Y')

    # Merge datasets
    merged_data = pd.merge(merged_data, prices_data, left_on=['district_name'], right_on=['district'])
    merged_data.ffill(inplace=True)

    # Create new feature: average_price
    merged_data['average_price'] = merged_data[['min_price', 'max_price']].mean(axis=1)
    
    return merged_data, necessary_nutrition, food

# Preprocess the data
merged_data, necessary_nutrition, food = preprocess_data()


In [17]:
def recommend_food_exchanges(surplus_deficit_status, dietary_needs):
    food_exchange_recommendations = {}

    for district, nutrient_status in surplus_deficit_status.items():
        for nutrient, surplus in nutrient_status.items():
            if surplus > 0:  # District has a surplus
                # Find districts that have a deficit in the nutrient
                for target_district, target_needs in dietary_needs.items():
                    total_need = sum(target_needs.get(nutrient, {}).values())
                    total_surplus = surplus_deficit_status.get(target_district, {}).get(nutrient, 0)
                    if total_need > total_surplus:  # Target district needs more
                        if nutrient not in food_exchange_recommendations:
                            food_exchange_recommendations[nutrient] = []
                        food_exchange_recommendations[nutrient].append((district, target_district))
                        break  # Recommend only one surplus district per nutrient

    return food_exchange_recommendations


In [18]:
def calculate_dietary_needs(state, district, merged_data, necessary_nutrition):
    age_gender_groups = {
        'children_male': ['young male crossbred (1000 number)', 'young male indigenous (1000 number)'],
        'children_female': ['young female crossbred (1000 number)', 'young female indigenous (1000 number)'],
        'adults_male': ['adult male crossbred (1000 number)', 'adult male indigenous (1000 number)'],
        'adults_female': ['adult female crossbred (1000 number)', 'adult female indigenous (1000 number)']
    }

    input_data = merged_data[(merged_data['state_name'].str.lower() == state.lower()) & (merged_data['district_name'].str.lower() == district.lower())]
    if input_data.empty:
        return "No data available for the given state and district."

    required_nutrition = necessary_nutrition.set_index('nutrient')['daily_requirement'].to_dict()
    dietary_needs = {}
    for nutrient, daily_requirement in required_nutrition.items():
        dietary_needs[nutrient] = {}
        for group, columns in age_gender_groups.items():
            group_population = sum(input_data[col].values[0] for col in columns) * 1000  # Convert thousands to actual number
            total_requirement = group_population * daily_requirement
            dietary_needs[nutrient][group] = total_requirement
    
    return dietary_needs


In [19]:
def get_nutrition_info(required_nutrition, necessary_nutrition, food):
    nutrient_to_category = {
        'protein': 'protein',
        'carbohydrates': 'carbohydrate',
        'total fat': 'fat',
        'saturated fat': 'saturated fat',
        'fiber': 'fiber',
        'sugar': 'sugar',
        'vitamin a': 'vitamin a',
        'vitamin c': 'vitamin c',
        'calcium': 'calcium',
        'iron': 'iron',
        'potassium': 'potassium',
        'magnesium': 'magnesium',
        'zinc': 'zinc',
        'vitamin d': 'vitamin d',
        'vitamin b12': 'vitamin b12'
    }
    
    nutrition_info = {}
    for nutrient, category in nutrient_to_category.items():
        food_items = food[food['category'].str.contains(category, case=False, na=False)]
        description = necessary_nutrition[necessary_nutrition['nutrient'].str.lower() == nutrient.lower()]['description'].values[0]
        if not food_items.empty:
            nutrition_info[nutrient] = {
                'daily_requirement': required_nutrition[nutrient],
                'description': description,
                'food_items': food_items['description'].tolist()
            }
        else:
            nutrition_info[nutrient] = {
                'daily_requirement': required_nutrition[nutrient],
                'description': description,
                'food_items': ["No specific food items found"]
            }
    return nutrition_info


In [20]:
def identify_surplus_deficit(state, merged_data, necessary_nutrition):
    state_data = merged_data[merged_data['state_name'].str.lower() == state.lower()]
    if state_data.empty:
        print(f"No data available for the state: {state}")
        return {}

    total_production = state_data.groupby('district_name')['production'].sum()
    if total_production.empty:
        print(f"No production data available for the state: {state}")
        return {}

    dietary_needs = {}
    for district in state_data['district_name'].unique():
        dietary_needs[district] = calculate_dietary_needs(state, district, merged_data, necessary_nutrition)

    surplus_deficit = {}
    for district, needs in dietary_needs.items():
        if district not in total_production:
            print(f"No production data available for the district: {district}")
            continue

        production = total_production[district] * 1000  # Convert to actual number
        surplus_deficit[district] = {}

        for nutrient, group_needs in needs.items():
            total_need = sum(group_needs.values())
            surplus_deficit[district][nutrient] = production - total_need

    return surplus_deficit


In [21]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

def train_crop_production_model(data, features, target):
    X = data[features]
    y = data[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    print(f'Mean Absolute Error: {mae}')
    
    return model


In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def train_nutrient_deficiency_model(data, features, target):
    X = data[features]
    y = data[target]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    model = LogisticRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy}')
    
    return model, scaler


In [23]:
def define_nutrient_deficiency(data, necessary_nutrition, threshold=5000):
    age_gender_groups = {
        'children_male': ['young male crossbred (1000 number)', 'young male indigenous (1000 number)'],
        'children_female': ['young female crossbred (1000 number)', 'young female indigenous (1000 number)'],
        'adults_male': ['adult male crossbred (1000 number)', 'adult male indigenous (1000 number)'],
        'adults_female': ['adult female crossbred (1000 number)', 'adult female indigenous (1000 number)']
    }

    required_nutrition = necessary_nutrition.set_index('nutrient')['daily_requirement'].to_dict()
    data['nutrient_deficiency'] = 0  # Initialize with no deficiency

    for idx, row in data.iterrows():
        total_deficit = 0
        for nutrient, daily_requirement in required_nutrition.items():
            total_requirement = 0
            for group, columns in age_gender_groups.items():
                group_population = sum(row[col] for col in columns) * 1000  # Convert thousands to actual number
                total_requirement += group_population * daily_requirement
            production = row['production'] * 1000  # Convert thousands to actual number
            total_deficit += max(0, total_requirement - production)
        
        # Mark deficiency if total deficit exceeds threshold
        if total_deficit > threshold:
            data.at[idx, 'nutrient_deficiency'] = 1

    return data


In [24]:
from scipy.optimize import linprog

def optimize_crop_selection(data, demand):
    # Calculate profitability (e.g., production * average_price)
    data['profitability'] = data['production'] * data['average_price']

    # Define optimization problem
    c = -data['profitability'].values  # Negative for maximization
    A_eq = np.ones((1, len(data)))  # Constraint matrix to ensure total production meets demand
    b_eq = [demand]  # Total demand to meet
    bounds = [(0, None)] * len(data)  # Bounds for each variable (no negative production)
    
    # Linear programming optimization
    result = linprog(c, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method='highs')
    return result


In [25]:
def comprehensive_model(state, district):
    # Preprocess Data
    merged_data, necessary_nutrition, food = preprocess_data()

    # Calculate Dietary Needs
    dietary_needs = calculate_dietary_needs(state, district, merged_data, necessary_nutrition)
    
    # Define the required_nutrition dictionary
    required_nutrition = necessary_nutrition.set_index('nutrient')['daily_requirement'].to_dict()
    
    # Get Nutrition Info
    nutrition_info = get_nutrition_info(required_nutrition, necessary_nutrition, food)
    
    # Identify Surplus and Deficit Areas
    surplus_deficit_status = identify_surplus_deficit(state, merged_data, necessary_nutrition)
    
    # Recommend Food Exchanges
    food_exchange_recommendations = recommend_food_exchanges(surplus_deficit_status, dietary_needs)
    
    # Train Crop Production Model with Prices Data
    features = ['area', 'actual rainfall', 'normal rainfall', 'crop_year', 'average_price']
    target = 'production'
    crop_production_model = train_crop_production_model(merged_data, features, target)
    
    # Future Crop Production and Price Forecast
    future_years = [2025, 2026, 2027, 2028, 2029]
    future_production_forecasts = []
    future_price_forecasts = []
    for year in future_years:
        future_data = pd.DataFrame({
            'area': [2000],
            'actual rainfall': [310],
            'normal rainfall': [250],
            'crop_year': [year],
            'average_price': [800]  # Example average price
        })
        future_production = crop_production_model.predict(future_data)
        future_price = 800 + (year - 2025) * 10  # Example linear price increase
        future_production_forecasts.append(future_production[0])
        future_price_forecasts.append(future_price)
    
    # Define and Train Nutrient Deficiency Model
    merged_data = define_nutrient_deficiency(merged_data, necessary_nutrition)
    target = 'nutrient_deficiency'
    nutrient_deficiency_model, scaler = train_nutrient_deficiency_model(merged_data, features, target)
    
    # Prediction example
    future_data_scaled = scaler.transform(future_data)
    deficiency_prediction = nutrient_deficiency_model.predict(future_data_scaled)
    
    # Calculate total demand
    total_demand = 0
    for nutrient, total_need_dict in dietary_needs.items():
        total_need = sum(total_need_dict.values())  # Sum the needs across all age groups
        total_surplus = surplus_deficit_status[district].get(nutrient, 0)
        total_demand += total_need - total_surplus

    # Optimize Crop Selection and Distribution for Profitability
    optimization_result = optimize_crop_selection(merged_data, total_demand)
    
    # Combine Results
    results = {
        'Nutrient Deficiencies': dietary_needs,
        'Food Recommendations': nutrition_info,
        'Surplus/Deficit Status': surplus_deficit_status,
        'Food Exchange Recommendations': food_exchange_recommendations,
        'Future Crop Production Forecasts': future_production_forecasts,
        'Future Crop Price Forecasts': future_price_forecasts,
        'Nutrient Deficiency Prediction': deficiency_prediction,
        'Optimization Result': optimization_result
    }
    
    return results


In [None]:
results = comprehensive_model('andhra pradesh', 'kurnool')
print(results)