In [87]:
import pandas as pd
import numpy as np

# Generate simulated dataset for farmers with trust categories
def generate_data(num_points):
    data = []
    for _ in range(num_points):
        data_point = {}

        # Simulate features
        data_point['Land Size (hectares)'] = np.random.uniform(0.5, 2.5)
        data_point['Farming Practices'] = np.random.choice(['Traditional', 'Organic', 'Hybrid', 'Modern'])
        data_point['Soil Quality'] = np.random.choice(['Low', 'Average', 'High'])
        data_point['Irrigation'] = np.random.choice(['Canal', 'Well', 'Sprinkler', 'Tubewell', 'Drip'])
        data_point['Yield (kg/hectare)'] = np.random.randint(3000, 8000)
        data_point['Education Level'] = np.random.choice(['Primary', 'Secondary', 'Higher Secondary'])
        data_point['Access to Healthcare'] = np.random.choice(['Poor', 'Average', 'Good'])
        data_point['Membership in Agricultural Cooperatives'] = np.random.choice(['No', 'Yes'])
        data_point['Land Ownership'] = np.random.choice(['Leased', 'Owned', 'Shared'])
        data_point['Access to Credit'] = np.random.choice(['Difficult', 'Moderate', 'Easy'])
        data_point['Crop Insurance Coverage'] = np.random.choice(['None', 'Partial', 'Full'])
        data_point['Annual Income from Agriculture'] = np.random.randint(100000, 1000000)
        data_point['Mobile Phone Ownership'] = np.random.choice(['None', 'Feature Phone', 'Smartphone'])
        data_point['Internet Access'] = np.random.choice(['No', 'Yes'])
        data_point['Usage of Agricultural Apps'] = np.random.choice(['None', 'Regular', 'Occasional'])
        data_point['Online Purchase of Agricultural Inputs'] = np.random.choice(['No', 'Yes'])

        # Add additional features
        data_point['Marital Status'] = np.random.choice(['Single', 'Married', 'Divorced'])
        data_point['Loan'] = np.random.choice(['No', 'Yes'])
        data_point['Overdue Debt Value'] = np.random.randint(0, 50000)
        data_point['Frequency of Entries to Fraud Sites'] = np.random.choice(['None', 'Low', 'Moderate', 'High'])
        data_point['Frequency of Entries to Gambling Sites'] = np.random.choice(['None', 'Low', 'Moderate', 'High'])
        data_point['Frequency of Entries to Disease Sites'] = np.random.choice(['None', 'Low', 'Moderate', 'High'])
        data_point['Frequency of Entries to Drug Abuse and Distribution Sites'] = np.random.choice(['None', 'Low', 'Moderate', 'High'])
        data_point['Gender'] = np.random.choice(['m','f'])
        data_point['Age'] = np.random.randint(18,50)

        # Scoring system based on features
        score = (
            data_point['Land Size (hectares)'] * 0.1 +
            (1 if data_point['Farming Practices'] == 'Organic' else 0) * 0.2 +
            (1 if data_point['Soil Quality'] == 'High' else 0) * 0.1 +
            (1 if data_point['Irrigation'] == 'Drip' else 0) * 0.2 +
            (data_point['Yield (kg/hectare)'] - 3000) / 5000 * 0.2 +
            (1 if data_point['Education Level'] == 'Higher Secondary' else 0) * 0.1 +
            (1 if data_point['Access to Healthcare'] == 'Good' else 0) * 0.1 +
            (1 if data_point['Membership in Agricultural Cooperatives'] == 'Yes' else 0) * 0.1 +
            (1 if data_point['Land Ownership'] == 'Owned' else 0) * 0.1 +
            (1 if data_point['Access to Credit'] == 'Easy' else 0) * 0.2 +
            (1 if data_point['Crop Insurance Coverage'] == 'Full' else 0) * 0.1 +
            (data_point['Annual Income from Agriculture'] - 1000000) / 1500000 * 0.2 +
            (1 if data_point['Mobile Phone Ownership'] == 'Smartphone' else 0) * 0.1 +
            (1 if data_point['Internet Access'] == 'Yes' else 0) * 0.1 +
            (1 if data_point['Usage of Agricultural Apps'] == 'Regular' else 0) * 0.1 +
            (1 if data_point['Online Purchase of Agricultural Inputs'] == 'Yes' else 0) * 0.1 -
            (data_point['Frequency of Entries to Fraud Sites'] == 'High') * 0.1 -
            (data_point['Frequency of Entries to Gambling Sites'] == 'High') * 0.1 -
            (data_point['Frequency of Entries to Disease Sites'] == 'High') * 0.1 -
            (data_point['Frequency of Entries to Drug Abuse and Distribution Sites'] == 'High') * 0.1
        )

        # Assign trust category based on the score
        if score > 0.7:
            data_point['Trust Category'] = 1
        elif score > 0.4:
            data_point['Trust Category'] = 2
        elif score > 0.1:
            data_point['Trust Category'] = 3
        else:
            data_point['Trust Category'] = 4

        data.append(data_point)

    return pd.DataFrame(data)

# Generate 100 data points
df = generate_data(1000)

# Print the DataFrame
print(df)


    Access to Credit Access to Healthcare  Age  \
0          Difficult                 Poor   41   
1          Difficult                 Poor   29   
2           Moderate                 Good   49   
3          Difficult                 Poor   24   
4               Easy                 Good   43   
5           Moderate                 Good   28   
6          Difficult                 Poor   42   
7           Moderate              Average   29   
8           Moderate              Average   38   
9               Easy                 Poor   47   
10          Moderate                 Good   30   
11          Moderate                 Good   26   
12         Difficult                 Poor   43   
13              Easy                 Poor   48   
14              Easy                 Good   36   
15              Easy                 Good   28   
16         Difficult                 Good   23   
17              Easy                 Poor   47   
18          Moderate                 Poor   27   


In [88]:
df.to_csv("Close_neighbour.csv")

In [58]:
len(df.columns)

26

In [89]:
df["Annual Income from Agriculture"]  = pd.to_numeric(df["Annual Income from Agriculture"])
df["Land Size (hectares)"]  = pd.to_numeric(df["Land Size (hectares)"])
df["Overdue Debt Value"]  = pd.to_numeric(df["Overdue Debt Value"])
df["Yield (kg/hectare)"]  = pd.to_numeric(df["Yield (kg/hectare)"])
df["Age"] = pd.to_numeric(df["Age"])

In [56]:
X.head()

Unnamed: 0,Access to Credit,Access to Healthcare,Age,Annual Income from Agriculture,Crop Insurance Coverage,Education Level,Farming Practices,Frequency of Entries to Disease Sites,Frequency of Entries to Drug Abuse and Distribution Sites,Frequency of Entries to Fraud Sites,...,Land Size (hectares),Loan,Marital Status,Membership in Agricultural Cooperatives,Mobile Phone Ownership,Online Purchase of Agricultural Inputs,Overdue Debt Value,Soil Quality,Usage of Agricultural Apps,Yield (kg/hectare)
0,0,1,36,2414697,1,1,2,3,1,2,...,1.28334,1,1,0,0,1,9110,0,1,5795
1,2,2,21,1425868,2,1,2,1,1,0,...,1.156735,0,1,1,0,1,15715,2,2,5239
2,2,2,34,3113201,2,1,1,1,3,0,...,2.225266,0,2,0,0,0,10818,0,1,7057
3,2,2,22,2425083,0,0,1,0,0,0,...,0.71427,1,1,1,1,0,22506,0,0,6328
4,0,1,44,2894647,1,1,2,2,1,3,...,2.026906,1,0,1,1,1,8432,1,1,6878


In [93]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# # Convert categorical columns to numerical using Label Encoding
le = LabelEncoder()

for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = le.fit_transform(df[column])

# Extract features and target variable
X = df.drop(['Trust Category'], axis=1)  # Features
y = df['Trust Category']  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=250, random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.

Accuracy: 0.6272727272727273

Confusion Matrix:
[[ 79  52   0   0]
 [ 19 128   0   0]
 [  1  46   0   0]
 [  0   3   2   0]]

Classification Report:
              precision    recall  f1-score   support

           1       0.80      0.60      0.69       131
           2       0.56      0.87      0.68       147
           3       0.00      0.00      0.00        47
           4       0.00      0.00      0.00         5

   micro avg       0.63      0.63      0.63       330
   macro avg       0.34      0.37      0.34       330
weighted avg       0.57      0.63      0.58       330



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

In [94]:
import pandas as pd

# Simulate data for Mohan
mohan_data = {
    'Access to Credit': 'Moderate',  # Mohan has easy access to credit
    'Access to Healthcare': 'Good',
    'Age': np.random.randint(20, 50),  # Mohan's age
    'Annual Income from Agriculture': 419408.82,
    'Crop Insurance Coverage': 'Full',  # Mohan has full crop insurance coverage
    'Education Level': 'Higher Secondary',
    'Farming Practices': 'Traditional',  # Mohan practices organic farming
    'Frequency of Entries to Disease Sites': 'None',
    'Frequency of Entries to Drug Abuse and Distribution Sites': 'None',
    'Frequency of Entries to Fraud Sites': 'None',
    'Frequency of Entries to Gambling Sites': 'None',
    'Gender': 'Male',  # Mohan's gender
    'Internet Access': 'Yes',  # Mohan has internet access
    'Irrigation': 'Drip',  # Mohan uses drip irrigation
    'Land Ownership': 'Owned',  # Mohan owns his land
    'Land Size (hectares)': 1.22,
    'Loan': 'No',  # Mohan doesn't have a loan
    'Marital Status': 'Married',  # Mohan is married
    'Membership in Agricultural Cooperatives': 'Yes',  # Mohan is a member of a cooperative
    'Mobile Phone Ownership': 'Smartphone',  # Mohan owns a smartphone
    'Online Purchase of Agricultural Inputs': 'Yes',  # Mohan makes online purchases of agricultural inputs
    'Overdue Debt Value': 0,  # Mohan doesn't have overdue debt
    'Soil Quality': 'High',  # Mohan has high soil quality
    'Usage of Agricultural Apps': 'Regular',  # Mohan regularly uses agricultural apps
    'Yield (kg/hectare)': 7008,
}

# Create a DataFrame for Mohan
mohan_df = pd.DataFrame([mohan_data], columns=[
    'Access to Credit', 'Access to Healthcare', 'Age',
    'Annual Income from Agriculture', 'Crop Insurance Coverage',
    'Education Level', 'Farming Practices',
    'Frequency of Entries to Disease Sites',
    'Frequency of Entries to Drug Abuse and Distribution Sites',
    'Frequency of Entries to Fraud Sites',
    'Frequency of Entries to Gambling Sites', 'Gender', 'Internet Access',
    'Irrigation', 'Land Ownership', 'Land Size (hectares)', 'Loan',
    'Marital Status', 'Membership in Agricultural Cooperatives',
    'Mobile Phone Ownership', 'Online Purchase of Agricultural Inputs',
    'Overdue Debt Value', 'Soil Quality', 'Usage of Agricultural Apps',
    'Yield (kg/hectare)'
])

# Print Mohan's data
print(mohan_df)

  Access to Credit Access to Healthcare  Age  Annual Income from Agriculture  \
0         Moderate                 Good   42                       419408.82   

  Crop Insurance Coverage   Education Level Farming Practices  \
0                    Full  Higher Secondary       Traditional   

  Frequency of Entries to Disease Sites  \
0                                  None   

  Frequency of Entries to Drug Abuse and Distribution Sites  \
0                                               None          

  Frequency of Entries to Fraud Sites  ... Land Size (hectares) Loan  \
0                                None  ...                 1.22   No   

  Marital Status Membership in Agricultural Cooperatives  \
0        Married                                     Yes   

  Mobile Phone Ownership  Online Purchase of Agricultural Inputs  \
0             Smartphone                                     Yes   

  Overdue Debt Value Soil Quality Usage of Agricultural Apps  \
0                  0       

In [96]:
mohan_df.to_csv("mohan_CN.csv")

In [114]:
# Updated financial data for Mohan
financial_data = {
    'Assets_Real_Estate': np.random.randint(50000, 2000000),
    'Assets_Liquid_Cash': np.random.randint(5000, 50000),
    'Assets_Investments': np.random.randint(10000, 100000),
    'Liabilities_Loans': np.random.randint(0, 50000),
    'Liabilities_Credit_Card_Debt': 0,
    'Income_Salary': 0,
    'Income_Business': 0,
    'Income_Rent': np.random.randint(0, 2000),
    'Other_Income': np.random.randint(0, 10000),
}

# Additional information for Mohan
mohan_income_from_farming = 419408.82/12
mohan_family_size = 1 + 1 + 3

# Calculate Mohan's net annual income
monthly_income = financial_data['Income_Business'] + financial_data['Income_Rent'] + financial_data['Other_Income']
annual_income = monthly_income * 12 + mohan_income_from_farming
net_annual_income = annual_income - financial_data['Liabilities_Loans'] - financial_data['Liabilities_Credit_Card_Debt']

# Update financial data for Mohan
financial_data.update({'Income_Farming': mohan_income_from_farming, 'Net_Annual_Income': net_annual_income, 'Family_Size': mohan_family_size})

# Create a DataFrame for Mohan's updated financial data
mohan_df = pd.DataFrame([financial_data])

# Define weights for each financial category
weights = {
    'Assets_Real_Estate': 0.3,
    'Assets_Liquid_Cash': 0.2,
    'Assets_Investments': 0.2,
    'Liabilities_Loans': -0.2,
    'Liabilities_Credit_Card_Debt': -0.1,
    'Income_Salary': 0,
    'Income_Business': 0,
    'Income_Rent': 0.1,
    'Other_Income': 0.1,
    'Income_Farming': 0.25,
    'Net_Annual_Income': 0.2,
    'Family_Size': -0.15,
}

# Maximum possible values for scaling
max_values = {
    'Assets_Real_Estate': 2000000,
    'Assets_Liquid_Cash': 50000,
    'Assets_Investments': 100000,
    'Liabilities_Loans': 50000,
    'Income_Rent': 2000,
    'Other_Income': 10000,
    'Net_Annual_Income': 100000,
    'Family_Size': 10,
    'Income_Farming': 500000,
    
    
}

# Apply scaling to numerical columns
numerical_columns = ['Assets_Real_Estate', 'Assets_Liquid_Cash', 'Assets_Investments', 'Liabilities_Loans', 'Income_Rent', 'Other_Income','Net_Annual_Income','Family_Size','Income_Farming']
mohan_df[numerical_columns] = mohan_df[numerical_columns] / pd.Series(max_values)

# Calculate the financial score for Mohan
mohan_df['Financial_Score'] = sum(mohan_df[category] * weights[category] for category in weights.keys())

# Print Mohan's updated financial data and the calculated financial score
print(mohan_df)


   Assets_Investments  Assets_Liquid_Cash  Assets_Real_Estate  Family_Size  \
0             0.21675             0.68798            0.699518          0.5   

   Income_Business  Income_Farming  Income_Rent  Income_Salary  \
0                0        0.069901       0.2995              0   

   Liabilities_Credit_Card_Debt  Liabilities_Loans  Net_Annual_Income  \
0                             0            0.04174           1.386317   

   Other_Income  Financial_Score  
0        0.8215         0.714292  


In [115]:
mohan_df.to_csv("Asset_liability_mohan.csv")

In [86]:
print(financial_data)

{'Assets_Real_Estate': 1893305, 'Assets_Liquid_Cash': 48788, 'Assets_Investments': 64116, 'Liabilities_Loans': 19363, 'Liabilities_Credit_Card_Debt': 0, 'Income_Salary': 0, 'Income_Business': 0, 'Income_Rent': 1741, 'Other_Income': 3065, 'Income_Farming': 2359376.9166666665, 'Net_Annual_Income': 2397685.9166666665, 'Family_Size': 5}


In [72]:
missing_cols = set(X.columns) - set(mohan_df.columns)
for col in missing_cols:
    print(col)

In [95]:
mohan_df["Annual Income from Agriculture"]  = pd.to_numeric(mohan_df["Annual Income from Agriculture"])
mohan_df["Land Size (hectares)"]  = pd.to_numeric(mohan_df["Land Size (hectares)"])
mohan_df["Overdue Debt Value"]  = pd.to_numeric(mohan_df["Overdue Debt Value"])
mohan_df["Yield (kg/hectare)"]  = pd.to_numeric(mohan_df["Yield (kg/hectare)"])
mohan_df["Age"] = pd.to_numeric(mohan_df["Age"])

In [97]:
le1 = LabelEncoder()

for column in mohan_df.columns:
    if mohan_df[column].dtype == 'object':
        mohan_df[column] = le1.fit_transform(mohan_df[column])

In [98]:
yhat = rf_classifier.predict(mohan_df)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int)


In [99]:
yhat

array([2])

In [78]:
mohan_df.to_csv("mohan.csv")

In [117]:
# Sample data for a farmer
farmer_data = {
    'Financial_Score': 0.71,  # Assume a financial score between 0 and 1
    'Close_Neighbour_Category': 0.75,  # Assume a category value (1, 2, 3, etc.)
    'Expected_Annual_Income': 419408.82/480991.34,  # forecast annual income avg(10yrs)/max possible avf annual income(10yrs)
    'Agricultural_Efficiency': 0.87,  # Assume an efficiency score between 0 and 1
    'Farmer_Plot_Score': 0.92,  # Assume a plot score between 0 and 1
}

# Define weights for each factor
weights = {
    'Financial_Score': 0.3,
    'Close_Neighbour_Category': 0.2,
    'Expected_Annual_Income': 0.3,
    'Agricultural_Efficiency': 0.1,
    'Farmer_Plot_Score': 0.1,
}

# Calculate the Kisan Credit Score using weighted average
kisan_credit_score = sum(farmer_data[feature] * weights[feature] for feature in weights.keys())

# Print the Kisan Credit Score
print("Kisan Credit Score:", kisan_credit_score)


Kisan Credit Score: 0.8035902523317778


In [116]:
import numpy as np

# Simulate data for Mohan's farmer plot with numerical scores
plot_data = {
    'Land Size (hectares)': 1.22,
    'Soil Quality': 0.9,  # Assume Mohan has high soil quality (scored 0.9 out of 1)
    'Irrigation': 0.8,  # Assume Mohan uses drip irrigation (scored 0.8 out of 1)
    'Farming Practices': 0.7,  # Assume Mohan practices organic farming (scored 0.7 out of 1)
}

# Define weights for each factor
plot_weights = {
    'Land Size (hectares)': 0.25,
    'Soil Quality': 0.35,
    'Irrigation': 0.25,
    'Farming Practices': 0.15,
}

# Calculate the plot score using weighted average
total_plot_weight = sum(plot_weights.values())
plot_score = sum(plot_data[feature] * plot_weights[feature] for feature in plot_weights.keys()) / total_plot_weight

# Print Mohan's farmer plot data and plot score
print("Mohan's Farmer Plot Data:")
print(plot_data)
print("\nPlot Score:", plot_score)

Mohan's Farmer Plot Data:
{'Land Size (hectares)': 1.22, 'Soil Quality': 0.9, 'Irrigation': 0.8, 'Farming Practices': 0.7}

Plot Score: 0.925
