In [None]:
!pip install python-dotenv pandas numpy openpyxl sqlalchemy

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

DB_HOST = os.getenv("SANDHYA_ERP_DB_HOST")
DB_PORT = os.getenv("SANDHYA_ERP_DB_PORT")

DB_USERNAME = os.getenv("SANDHYA_ERP_DB_USERNAME")
DB_PASSWORD = os.getenv("SANDHYA_ERP_DB_PASSWORD")
DB_NAME = os.getenv("SANDHYA_ERP_DB_NAME")

sandhya_erp_db_url = (
    f"mysql+pymysql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

In [2]:
import pandas as pd
import sqlalchemy

# show all rows and columns
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [3]:
engine = sqlalchemy.create_engine(sandhya_erp_db_url)

In [4]:
supplier_df = pd.read_sql("SELECT * FROM erpx_dev.suppliers", engine)
supplier_df.head()

Unnamed: 0,supplier_id,code,name,supplier_category,gst_number,broker_id,contact_person,street,apartment,city,state,district,postal_code,commission,bank_acc_no,bank_ifsc,bank_name,bank_branch,contact_number,email_id,is_active,created_at,created_user,updated_at,updated_user,version_flag,payment_period
0,1,A1,Karuna Raju,Agent,,0,Gopi Raju,,,,ap,Ongole,123456,2.0,,,,,,,1,2023-01-20 17:18:17.358860,,2023-01-23 15:24:21.920682,,1,
1,2,A2,Sri Lakshmi Aqua Farms,Agent,,0,K Vijay,,,,ap,bhVEM,123456,2.0,,,,,,,1,2023-01-21 15:59:41.769207,,2023-01-23 15:24:27.197321,,1,
2,3,A3,Sri Venkateswara Traders,Agent,,0,SVT,,,,ap,bhVEM,123456,2.0,,,,,,,1,2023-01-21 16:19:28.500526,,2023-01-23 15:24:30.204286,,1,
3,4,A4,Varalakshmi Agencies,Agent,,0,Rakesh,,,,ap,KK,123456,1.0,,,,,,,1,2023-01-23 14:39:43.386196,,2023-01-23 15:24:34.793301,,1,
4,7,A5,Sri Lakshmi Aqua Farms,Agent,,0,Rakesh,,,,AP,KK,123456,1.0,,,,,,,0,2023-01-23 17:33:11.528853,,2023-01-24 19:23:35.251509,,2,


In [5]:
supplier_df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
supplier_id,572.0,288.486014,1.0,145.75,288.5,431.25,574.0,165.29061
broker_id,572.0,0.215035,0.0,0.0,0.0,0.0,123.0,5.142888
commission,567.0,920.155203,1.0,1.0,1.0,1.0,521157.0,21886.494395
is_active,572.0,0.977273,0.0,1.0,1.0,1.0,1.0,0.149163
created_at,572.0,2023-08-27 04:53:19.229318400,2023-01-20 17:18:17.358860,2023-02-25 11:28:33.746909440,2023-06-01 14:03:45.611426048,2023-11-17 12:35:04.927335936,2025-08-12 12:22:13.410409,
updated_at,572.0,2023-09-01 05:44:39.985777152,2023-01-23 15:24:21.920682,2023-02-25 15:49:37.842742528,2023-06-03 04:25:53.728720384,2023-11-28 12:55:13.254458880,2025-08-12 12:22:13.410409,
version_flag,572.0,1.083916,1.0,1.0,1.0,1.0,4.0,0.307444


No significant numerical data in erpx_dev.suppliers table

In [6]:
# supplier with max commission

supplier_df[supplier_df["commission"] == supplier_df["commission"].max()]

Unnamed: 0,supplier_id,code,name,supplier_category,gst_number,broker_id,contact_person,street,apartment,city,state,district,postal_code,commission,bank_acc_no,bank_ifsc,bank_name,bank_branch,contact_number,email_id,is_active,created_at,created_user,updated_at,updated_user,version_flag,payment_period
489,492,A481,SUMUKHA TRADERS,Agent,,0,PARASA,,,,AP,KRISHNA,,521157.0,,,,,,,1,2024-07-08 17:07:10.619320,,2024-07-08 17:07:10.619320,,1,


In [7]:
supplier_df.nunique()

supplier_id          572
code                 572
name                 556
supplier_category      3
gst_number             1
broker_id              2
contact_person        11
street                 1
apartment              0
city                   1
state                  5
district               7
postal_code            7
commission             3
bank_acc_no            0
bank_ifsc              0
bank_name              0
bank_branch            0
contact_number         1
email_id               0
is_active              2
created_at           572
created_user           0
updated_at           572
updated_user           0
version_flag           4
payment_period         0
dtype: int64

In [8]:
supplier_df["supplier_category"].value_counts()

supplier_category
Agent     561
Farmer     10
Broker      1
Name: count, dtype: int64

Among 572 suppliers, 561 are agents, which is almost all of the suppliers. 

In [9]:
supplier_df[supplier_df["supplier_category"] == "Broker"]

Unnamed: 0,supplier_id,code,name,supplier_category,gst_number,broker_id,contact_person,street,apartment,city,state,district,postal_code,commission,bank_acc_no,bank_ifsc,bank_name,bank_branch,contact_number,email_id,is_active,created_at,created_user,updated_at,updated_user,version_flag,payment_period
120,123,B1,asdff,Broker,,0,,,,,,,,,,,,,,,1,2023-02-21 16:24:21.113710,,2023-02-21 16:24:21.113710,,1,


In [10]:
supplier_df[supplier_df["supplier_category"] == "Farmer"]

Unnamed: 0,supplier_id,code,name,supplier_category,gst_number,broker_id,contact_person,street,apartment,city,state,district,postal_code,commission,bank_acc_no,bank_ifsc,bank_name,bank_branch,contact_number,email_id,is_active,created_at,created_user,updated_at,updated_user,version_flag,payment_period
5,8,F1,K Sriu,Farmer,,0,Rakesh,,,,AP,KK,123456.0,1.0,,,,,,,1,2023-01-24 11:08:26.341499,,2023-01-24 11:08:26.341499,,1,
6,9,F2,srinivasarao k,Farmer,,0,sravani,,,,AP,Krishna,521157.0,,,,,,,,1,2023-01-24 11:36:09.012417,,2023-01-24 19:29:02.666640,,1,
21,24,F3,rahul,Farmer,GST33654789,0,,,,,,,,,,,,,,,1,2023-01-24 19:38:08.526412,,2023-01-24 19:38:08.526412,,1,
83,86,F4,madhav,Farmer,,123,,,,,,,,,,,,,,,1,2023-02-08 18:23:33.541614,,2023-02-21 16:24:21.000000,,2,
234,237,A230,SANDHYA AQUA FARMS KRITHIVENNU OWN,Farmer,,0,,,,,,,,1.0,,,,,,,1,2023-04-21 10:41:13.773245,,2023-08-29 17:01:45.000000,,2,
359,362,F5,V DURGA RAO,Farmer,,0,SRAVANI,,,,,KRISHNA,521157.0,1.0,,,,,,,1,2023-08-01 10:20:03.658414,,2023-08-01 10:20:03.658414,,1,
414,417,F6,SANDHYA AQUA EXPORTS PVT LTD FARM BEERAMGUNTA,Farmer,,0,KAVITHA,,,,AP,KRISHANA,521157.0,1.0,,,,,,,1,2023-10-21 14:12:00.607933,,2023-10-21 14:12:00.607933,,1,
437,440,F7,SANDHYA AQUA SRI FARMS,Farmer,,0,KAVITHA,,,,AP,KRISHNA,521157.0,1.0,,,,,,,1,2023-12-12 11:09:54.815987,,2023-12-12 11:09:54.815987,,1,
487,490,F8,GUDURU OWN PONDS,Farmer,,0,PARASA,,,,AP,KRISHNA,521157.0,1.0,,,,,,,1,2024-06-27 12:43:06.874291,,2024-06-27 12:43:06.874291,,1,
561,564,F9,x,Farmer,,0,,,,,,,,,,,,,,,1,2025-06-18 13:06:43.879680,,2025-06-18 13:06:43.879680,,1,


In [11]:
indent_df = pd.read_sql("SELECT * FROM erpx_dev_rm_procurement.indent", engine)
indent_df.head()

Unnamed: 0,is_active,created_at,created_user,updated_at,updated_user,version_flag,indent_id,indent_code,indent_date,supplier_type,commission_type,commission,harvesting_date,harvesting_time,assign_lead_1,assign_lead_2,vehicle_assigned,grn_completed,plant_id,indent_by,is_vehicle_required,harvest_type
0,1,2023-01-20 18:36:47.331519,unitone,2023-01-23 15:43:48,unitone,6,3,I24/A20/0001,2023-01-20 13:05:15,Agent,Percentage,0,2023-01-21 13:05:15,Morning,1809,,0,1,1,2178,0,Re Weighment
1,1,2023-01-20 18:48:41.656502,unitone,2023-01-20 18:49:11,unitone,3,4,I24/A20/0002,2023-01-20 13:18:12,Agent,Percentage,0,2023-01-21 13:18:12,Morning,1809,,0,1,2,1810,0,Re Weighment
2,1,2023-01-21 16:02:02.897476,superadmin,2023-01-21 16:09:08,superadmin,5,5,I24/A21/0003,2023-01-21 10:29:58,Agent,Percentage,0,2023-01-22 10:29:58,Morning,1809,,0,1,2,2178,0,Live Harvest
3,1,2023-01-21 16:21:02.870258,superadmin,2023-01-23 10:30:51,superadmin,5,6,I24/A21/0004,2023-01-21 10:49:33,Agent,Percentage,0,2023-01-22 10:49:33,Morning,1809,,0,1,1,2178,0,Re Weighment
4,1,2023-01-23 14:42:02.614058,unittwo,2023-01-23 14:48:31,unittwo,6,7,I24/A23/0005,2023-01-23 09:09:50,Agent,Percentage,0,2023-01-24 09:09:50,Morning,1809,,0,1,3,1809,0,Live Harvest


In [12]:
indent_items_df = pd.read_sql(
    "SELECT * FROM erpx_dev_rm_procurement.indent_items", engine
)
indent_items_df.head()

Unnamed: 0,indent_item_id,broker,farmer,farmer_code,product,expected_count,expected_qty,expected_price,expected_count2,expected_price2,expected_count3,expected_price3,is_active,created_at,created_user,updated_at,updated_user,version_flag,indent_id,harvest_report_id
0,2,0,1,,2,97,1500,237,100.0,225.0,,,1,2023-01-20 18:36:47.337082,Admin,2023-01-20 18:37:04,,2,3,0
1,3,0,1,,2,45,1000,350,,,,,1,2023-01-20 18:48:41.662178,Admin,2023-01-20 18:48:52,,2,4,0
2,4,0,2,,2,25,5000,300,,,,,1,2023-01-21 16:02:02.904148,Admin,2023-01-21 16:02:11,,2,5,0
3,5,0,3,,2,55,3500,275,57.0,270.0,,,1,2023-01-21 16:21:02.877064,Admin,2023-01-21 16:21:33,,2,6,0
4,6,0,4,,2,70,2500,290,75.0,286.0,,,1,2023-01-23 14:42:02.627996,Admin,2023-01-23 14:42:18,,2,7,0


In [13]:
indent_items_df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
indent_item_id,14639.0,7321.0,2.0,3661.5,7321.0,10980.5,14640.0,4226.05963
broker,14639.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
farmer,14639.0,178.764943,1.0,39.0,133.0,284.0,574.0,159.056061
farmer_code,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
product,14639.0,1.952183,1.0,2.0,2.0,2.0,3.0,0.214026
expected_count,14639.0,57.208757,1.0,35.0,55.0,75.0,1800.0,30.719453
expected_qty,14639.0,2839.682355,10.0,1100.0,2000.0,3700.0,36000.0,2557.56086
expected_price,14639.0,316.733042,0.0,265.0,300.0,350.0,8000.0,112.566047
expected_count2,12.0,152.083333,34.0,52.75,105.0,235.0,345.0,117.089289
expected_price2,8.0,337.375,90.0,258.75,274.0,281.5,1000.0,275.533217


In [14]:
indent_items_df.nunique()

indent_item_id       14639
broker                   1
farmer                 551
farmer_code              1
product                  3
expected_count         115
expected_qty           187
expected_price         173
expected_count2         11
expected_price2          7
expected_count3          0
expected_price3          0
is_active                1
created_at           14639
created_user             1
updated_at           11613
updated_user             0
version_flag            10
indent_id             8200
harvest_report_id        1
dtype: int64

In [15]:
grn_df = pd.read_sql("SELECT * FROM erpx_dev_rm_procurement.grn", engine)
grn_df.head()

Unnamed: 0,grn_id,grn_date,grn_number,supplier_type,is_active,created_at,created_user,updated_at,updated_user,version_flag,indent_no,plant_id,sub_plant_id
0,10,2023-01-20 18:42:08,G24/KRN/null/0001,Agent,1,2023-01-20 18:43:15.625959,unitone,2023-01-23 12:11:13.000000,unitone,4,3,2,6.0
1,11,2023-01-20 18:48:55,G24/KRN/null/0002,Agent,1,2023-01-20 18:49:11.632700,unitone,2023-03-30 14:26:57.151334,,1,4,0,
2,12,2023-01-20 18:54:14,G24/KRN/null/0003,Agent,1,2023-01-20 18:54:36.696090,unitone,2023-01-23 15:43:48.000000,admin,2,3,1,
3,13,2023-01-21 16:02:15,G24/123/null/0004,Agent,1,2023-01-21 16:06:28.127043,superadmin,2023-01-21 16:09:34.000000,superadmin,4,5,2,4.0
4,14,2023-01-21 16:21:36,G24/SVT/null/0005,Agent,1,2023-01-21 16:23:02.922709,superadmin,2023-01-23 10:30:51.000000,admin,4,6,1,8.0


In [16]:
grn_items_df = pd.read_sql("SELECT * FROM erpx_dev_rm_procurement.grn_items", engine)
grn_items_df.head()

Unnamed: 0,grn_item_id,grn_id,indent_id,lot_number,sale_order_id,sale_order_item_id,farmer,farmer_code,quantity,count,price,received_boxes,soft_percentage,boxes,is_active,assign_to_rm,created_at,created_user,updated_at,updated_user,version_flag,indent_item_id,product,broker,vi_inspection,rm_inspection,antibiotic_test,actual_quantity,actual_count,loose_shell,fungus_percentage,ice_boxes,empty_boxes,rm_boxes,seal_no,remarks,plant_lot_number,ref_no
0,1,10,3,LOT-U1/24/1/0001,,,1,,1000,55,50000.0,120.0,2.0,120,1,1,2023-01-20 18:43:15.631362,unitone,2023-01-23 12:11:18,unitone,3,2,2,0,0,0,0,,,3.0,1.0,,,,,,,
1,2,11,4,LOT-U1/24/1/0049,,,1,,1000,50,,0.0,,150,1,1,2023-01-20 18:49:11.637446,unitone,2023-01-25 12:04:13,,2,3,2,0,0,0,0,,,,,,,,,,,
2,3,12,3,,,,1,,1490,97,5200.0,48.0,0.0,48,1,0,2023-01-20 18:54:36.702904,unitone,2023-01-23 15:43:48,admin,2,2,2,0,0,0,0,,,0.0,0.0,,,,,,,
3,4,13,5,LOT-U1/24/2/0416,,,2,,2606,31,290.0,87.0,0.0,87,1,1,2023-01-21 16:06:28.135154,superadmin,2023-02-08 15:42:25,superadmin,4,4,4,0,0,0,0,,,0.0,0.0,,,,,,,
4,5,14,6,LOT-KKD/24/1/0002,,,3,,3264,57,270.0,116.0,0.0,116,1,1,2023-01-21 16:23:02.927747,superadmin,2023-01-23 12:24:27,admin,4,5,5,0,0,0,0,,,0.0,0.0,,,,,,,


In [17]:
grn_items_df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
grn_item_id,14564.0,7427.173647,1.0,3799.75,7440.5,11081.25,14722.0,4225.631215
grn_id,14564.0,4269.286116,10.0,2242.75,4386.5,6313.25,8354.0,2375.936839
indent_id,14564.0,4111.924746,3.0,2057.75,4200.5,6159.25,8202.0,2368.398727
sale_order_id,12570.0,2965.171997,220.0,1927.0,2558.0,3948.0,6394.0,1387.036262
sale_order_item_id,12570.0,10932.033413,358.0,8397.0,10295.0,14045.0,17956.0,3772.108139
farmer,14564.0,178.641994,1.0,39.0,133.0,284.0,574.0,159.153956
farmer_code,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
quantity,14564.0,2822.714227,4.0,1149.0,2138.0,3668.0,31870.0,2538.579961
count,14564.0,60.093244,5.0,40.0,54.0,72.0,4850.0,48.74078
price,269.0,517.925651,103.0,278.0,312.0,350.0,50000.0,3044.748637


In [18]:
grn_items_df["plant_lot_number"].nunique()

12959

In [19]:
grn_items_df.nunique()

grn_item_id           14564
grn_id                 8169
indent_id              8165
lot_number            13466
sale_order_id           693
sale_order_item_id      801
farmer                  551
farmer_code               1
quantity               5967
count                   175
price                   101
received_boxes          484
soft_percentage          12
boxes                   483
is_active                 1
assign_to_rm              2
created_at            14564
created_user             23
updated_at             2352
updated_user             21
version_flag             17
indent_item_id        14074
product               12072
broker                    1
vi_inspection             2
rm_inspection             1
antibiotic_test           2
actual_quantity           0
actual_count              0
loose_shell              20
fungus_percentage         6
ice_boxes                15
empty_boxes              25
rm_boxes                 52
seal_no                  38
remarks             

In [20]:
query = """
    SELECT

    -- Farmer Information
    ii.farmer,
    s.name AS farmer_name,

    -- Time to GRN
    AVG(TIMESTAMPDIFF(HOUR, i.indent_date, g.grn_date)) AS avg_time_to_grn,

    -- Average Expected (Planned) Values from Indent
    SUM(ii.expected_qty) AS total_expected_qty,                                 -- SUMMING ALL EXPECTED QUANITTY FROM FARMER RATHER TO AVERAGING
    AVG(ii.expected_price) AS avg_expected_price,
    -- AVG(ii.expected_count) AS avg_expected_count, 

    -- Average Actual Received Values from GRN
    SUM(gi.quantity) AS total_actual_quantity,                           -- SUMMING ALL ACTUAL QUANITTY FROM FARMER RATHER TO AVERAGING   
    AVG(gi.price) AS avg_actual_price,
    -- AVG(gi.actual_count) AS avg_actual_count,
    AVG(gi.received_boxes) AS avg_received_boxes,

    -- Key Quality & Performance Metrics
    AVG(ABS(gi.quantity - ii.expected_qty)) AS avg_quantity_variance,
    AVG(gi.price - ii.expected_price) AS avg_price_variance,
    AVG(ABS(gi.count - ii.expected_count)) AS avg_expected_count_variance,
    

    -- Identifiers for Traceability
    GROUP_CONCAT(DISTINCT i.indent_id) AS indent_ids,
    GROUP_CONCAT(DISTINCT ii.indent_item_id) AS indent_item_ids,
    GROUP_CONCAT(DISTINCT g.grn_id) AS grn_ids,
    GROUP_CONCAT(DISTINCT gi.grn_item_id) AS grn_item_ids,
    
    COUNT(DISTINCT i.indent_id) AS indent_count,
    COUNT(DISTINCT g.grn_id) AS grn_count,
    COUNT(DISTINCT gi.indent_item_id) AS indent_item_count,
    COUNT(DISTINCT gi.grn_item_id) AS grn_item_count,

    COUNT(DISTINCT gi.plant_lot_number) AS lots_supplied

FROM
    erpx_dev_rm_procurement.indent_items AS ii
JOIN
    erpx_dev_rm_procurement.indent AS i ON ii.indent_id = i.indent_id
LEFT JOIN
    erpx_dev_rm_procurement.grn AS g ON g.indent_no = i.indent_id
LEFT JOIN 
    erpx_dev_rm_procurement.grn_items AS gi ON gi.grn_id = g.grn_id
LEFT JOIN
    erpx_dev.suppliers AS s ON s.supplier_id = ii.farmer
GROUP BY
    ii.farmer
ORDER BY
    ii.farmer;
"""

test_df = pd.read_sql(query, engine)
test_df.head()

Unnamed: 0,farmer,farmer_name,avg_time_to_grn,total_expected_qty,avg_expected_price,total_actual_quantity,avg_actual_price,avg_received_boxes,avg_quantity_variance,avg_price_variance,avg_expected_count_variance,indent_ids,indent_item_ids,grn_ids,grn_item_ids,indent_count,grn_count,indent_item_count,grn_item_count,lots_supplied
0,1,Karuna Raju,302.8,21347.0,301.4,23530.0,27600.0,84.5556,421.3,27363.0,7.3,34956571399267146377297,2310989924594772827413100,10111217111912431467284147867447,123101821202524204908834513178,9,10,9,10,5
1,2,Sri Lakshmi Aqua Farms,33.7,43700.0,293.8,35934.0,278.0,122.0,1389.8,-3.6,4.5,51016037529,411122832283313486,131817887679,411122987298813564,4,4,6,6,3
2,3,Sri Venkateswara Traders,5.0,3500.0,275.0,3264.0,270.0,116.0,236.0,-5.0,2.0,6,5,14,5,1,1,1,1,0
3,4,Varalakshmi Agencies,128.7684,367500.0,319.5526,336530.0,312.5839,69.5122,852.7263,5.7591,9.6895,"7,8,15,17,51,53,58,84,97,113,128,141,154,181,2...","6,7,8,9,20,21,22,23,24,26,27,88,89,90,92,93,94...","15,16,23,25,78,80,83,113,127,143,158,170,184,2...","6,7,8,9,20,21,22,23,24,26,27,74,75,76,77,79,80...",16,16,52,52,2
4,8,K Sriu,3.3333,10500.0,286.6667,10072.0,287.5,60.8,343.6667,7.5,5.3333,1175727911,13141356114169,1977228063,13141363814251,3,3,4,4,2


In [21]:
test_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
farmer,551.0,291.119782,165.036096,1.0,152.5,294.0,433.0,574.0
avg_time_to_grn,551.0,99.121575,378.688176,3.3333,54.36515,70.8868,91.97325,8832.0
total_expected_qty,551.0,143334.462795,401387.713802,140.0,7100.0,23000.0,86240.0,3418625.0
avg_expected_price,551.0,329.605781,77.059775,0.0,284.475,314.0,359.0566,900.0
total_actual_quantity,551.0,141723.716878,400490.202795,118.0,6892.0,22867.0,79593.0,3475924.0
avg_actual_price,61.0,761.697493,3493.980183,205.0,285.0,305.3333,342.0,27600.0
avg_received_boxes,532.0,89.417331,63.014067,3.0,52.865475,75.0,104.216825,492.0
avg_quantity_variance,551.0,539.97385,482.34706,0.0,191.8333,427.75,765.34615,3708.111
avg_price_variance,61.0,455.398266,3502.654997,-39.0,-1.0,5.0,10.0,27363.0
avg_expected_count_variance,551.0,10.187644,36.740285,0.0,5.0,7.1552,10.92185,857.5


In [22]:
# Drop avg_actual_price and avg_price_variance due to low volume of data
dataset = test_df.drop(
    columns=[
        "avg_actual_price",
        "avg_price_variance",
        "grn_ids",
        "grn_item_ids",
        "indent_item_ids",
        "indent_ids",
        "lots_supplied",
    ]
)

In [23]:
dataset.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
farmer,551.0,291.119782,165.036096,1.0,152.5,294.0,433.0,574.0
avg_time_to_grn,551.0,99.121575,378.688176,3.3333,54.36515,70.8868,91.97325,8832.0
total_expected_qty,551.0,143334.462795,401387.713802,140.0,7100.0,23000.0,86240.0,3418625.0
avg_expected_price,551.0,329.605781,77.059775,0.0,284.475,314.0,359.0566,900.0
total_actual_quantity,551.0,141723.716878,400490.202795,118.0,6892.0,22867.0,79593.0,3475924.0
avg_received_boxes,532.0,89.417331,63.014067,3.0,52.865475,75.0,104.216825,492.0
avg_quantity_variance,551.0,539.97385,482.34706,0.0,191.8333,427.75,765.34615,3708.111
avg_expected_count_variance,551.0,10.187644,36.740285,0.0,5.0,7.1552,10.92185,857.5
indent_count,551.0,14.940109,41.28405,1.0,1.0,3.0,9.0,473.0
grn_count,551.0,14.883848,41.185668,1.0,1.0,3.0,9.0,473.0


Performing TOPSIS method to rank the farmer performance
1. Normalize the data
2. Calculate the weighted normalized decision matrix
3. Calculate the separation measures
4. Calculate the relative closeness to the ideal solution
5. Rank the farmers based on the relative closeness to the ideal solution



In [24]:
# Normalizing the data and normalizing if we have weights
def normalize_data(dataset, nCol, weights=None):
    for i in range(1, nCol):
        temp = 0

        for j in range(len(dataset)):
            temp += dataset.iloc[j, i] ** 2
        temp = temp**0.5

        for j in range(len(dataset)):
            if weights is not None:
                dataset.iloc[j, i] = (dataset.iloc[j, i] / temp) * weights[i - 1]
            else:
                dataset.iloc[j, i] = dataset.iloc[j, i] / temp

    return dataset


def calculate_ideal_values(dataset, nCol, impact):
    ideal_best = []
    ideal_worst = []

    for i in range(1, nCol):
        if impact[i - 1] == "+":
            ideal_best.append(dataset.iloc[:, i].max())
            ideal_worst.append(dataset.iloc[:, i].min())
        else:
            ideal_best.append(dataset.iloc[:, i].min())
            ideal_worst.append(dataset.iloc[:, i].max())

    return ideal_best, ideal_worst


def calculate_separation_measures(dataset, ideal_best, ideal_worst, nCol):
    separation_best = []
    separation_worst = []

    for i in range(len(dataset)):
        best = 0
        worst = 0

        for j in range(1, nCol):
            best += (dataset.iloc[i, j] - ideal_best[j - 1]) ** 2
            worst += (dataset.iloc[i, j] - ideal_worst[j - 1]) ** 2

        separation_best.append(best**0.5)
        separation_worst.append(worst**0.5)

    return separation_best, separation_worst


def calculate_relative_closeness(separation_best, separation_worst):
    relative_closeness = []

    for i in range(len(separation_best)):
        if separation_best[i] + separation_worst[i] == 0:
            relative_closeness.append(0)
        else:
            relative_closeness.append(
                separation_worst[i] / (separation_best[i] + separation_worst[i])
            )

    return relative_closeness


def rank_farmers(dataset, nCol, impact, weights=None):
    normalized_data = normalize_data(dataset.copy(), nCol, weights)
    ideal_best, ideal_worst = calculate_ideal_values(normalized_data, nCol, impact)
    separation_best, separation_worst = calculate_separation_measures(
        normalized_data, ideal_best, ideal_worst, nCol
    )
    relative_closeness = calculate_relative_closeness(separation_best, separation_worst)
    normalized_data["relative_closeness"] = relative_closeness
    normalized_data["rank"] = normalized_data["relative_closeness"].rank(
        ascending=False
    )

    return normalized_data.sort_values(by="rank")


numeric_columns_impact_mapper = {
    "avg_time_to_grn": "-",
    "total_expected_qty": "+",
    "avg_expected_price": "-",
    "total_actual_quantity": "+",
    "avg_received_boxes": "+",
    "avg_quantity_variance": "-",
    "avg_expected_count_variance": "-",
    "indent_count": "+",
    "grn_count": "+",
    "indent_item_count": "+",
    "grn_item_count": "+",
}

farmer_ids = dataset[["farmer", "farmer_name"]].copy()
numeric_dataset = dataset[
    ["farmer"] + list(numeric_columns_impact_mapper.keys())
].copy()

numeric_dataset = numeric_dataset.fillna(numeric_dataset.mean())

nCol = numeric_dataset.shape[1]

impact = list(numeric_columns_impact_mapper.values())  # Adjusted for 10 numeric columns

equal_weight_factor = 1 / (nCol - 1)  # Equal weight factor for normalization

weights = [equal_weight_factor] * (nCol - 1)

ranked_farmers = rank_farmers(numeric_dataset, nCol, impact, weights)

# Merge with original dataset to get all columns, then add ranking information
final_results = dataset.merge(
    ranked_farmers[["farmer", "relative_closeness", "rank"]], on="farmer", how="left"
)
final_results = final_results.sort_values(by="rank")
final_results.head()

  dataset.iloc[j, i] = (dataset.iloc[j, i] / temp) * weights[i-1]
  dataset.iloc[j, i] = (dataset.iloc[j, i] / temp) * weights[i-1]
  dataset.iloc[j, i] = (dataset.iloc[j, i] / temp) * weights[i-1]
  dataset.iloc[j, i] = (dataset.iloc[j, i] / temp) * weights[i-1]


Unnamed: 0,farmer,farmer_name,avg_time_to_grn,total_expected_qty,avg_expected_price,total_actual_quantity,avg_received_boxes,avg_quantity_variance,avg_expected_count_variance,indent_count,grn_count,indent_item_count,grn_item_count,relative_closeness,rank
17,21,KS RAJU,74.3528,3391850.0,289.809,3463088.0,133.9238,1169.1873,11.3017,473,473,563,576,0.910612,1.0
154,169,RAMALINGARAJU,200.7969,3106274.0,306.6151,3125085.0,97.1199,648.4749,8.5735,400,400,556,556,0.887891,2.0
15,19,SRINIVASARAO K,77.4832,3418625.0,312.4812,3475924.0,94.8579,1732.869,17.1245,361,360,582,592,0.866155,3.0
14,18,K SRINU,107.8041,2036650.0,309.8884,1922421.0,77.6538,1399.0011,18.213,239,239,386,406,0.760932,4.0
36,40,SRI RAMA AQUA,81.9508,2539562.0,310.8431,2517238.0,70.515,678.9086,10.884,178,177,341,343,0.742269,5.0


In [25]:
final_results.head(50)

Unnamed: 0,farmer,farmer_name,avg_time_to_grn,total_expected_qty,avg_expected_price,total_actual_quantity,avg_received_boxes,avg_quantity_variance,avg_expected_count_variance,indent_count,grn_count,indent_item_count,grn_item_count,relative_closeness,rank
17,21,KS RAJU,74.3528,3391850.0,289.809,3463088.0,133.9238,1169.1873,11.3017,473,473,563,576,0.910612,1.0
154,169,RAMALINGARAJU,200.7969,3106274.0,306.6151,3125085.0,97.1199,648.4749,8.5735,400,400,556,556,0.887891,2.0
15,19,SRINIVASARAO K,77.4832,3418625.0,312.4812,3475924.0,94.8579,1732.869,17.1245,361,360,582,592,0.866155,3.0
14,18,K SRINU,107.8041,2036650.0,309.8884,1922421.0,77.6538,1399.0011,18.213,239,239,386,406,0.760932,4.0
36,40,SRI RAMA AQUA,81.9508,2539562.0,310.8431,2517238.0,70.515,678.9086,10.884,178,177,341,343,0.742269,5.0
388,407,SHREEYAN MARINE EXPORTS PVT LTD,53.4301,2701530.0,304.5837,2606042.0,71.5909,942.7154,18.122,147,147,384,384,0.735012,6.0
49,53,NAGESH VARMA,80.6364,1872480.0,298.5796,1879254.0,113.7253,995.0241,10.1429,195,194,290,291,0.728088,7.0
265,284,MASTAN SKM,62.6291,2364360.0,249.4539,2345075.0,72.9059,1017.0801,16.1566,145,145,343,343,0.723638,8.0
35,39,CHANDRIKA SEAFOODS,71.1003,1634550.0,295.6354,1614700.0,66.1,653.4505,9.8737,189,189,318,323,0.722895,9.0
73,77,PK RAMALINGA RAJU,77.2637,1676800.0,306.0101,1691949.0,121.7312,656.1481,8.1562,167,167,236,243,0.705749,10.0


In [30]:
# Save to Excel file
final_results.to_excel("farmer_performance_ranking.xlsx", index=False)