In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
plt.style.use('default')
sns.set_palette("husl")


In [2]:
# Load the Excel file
excel_file_path = Path('../data/raw/Bellary.xlsx')
print(f"Loading Excel file: {excel_file_path}")
print(f"File exists: {excel_file_path.exists()}")
print(f"File size: {excel_file_path.stat().st_size / (1024*1024):.2f} MB")


Loading Excel file: ../data/raw/Bellary.xlsx
File exists: True
File size: 0.44 MB


In [3]:
# Read Excel file and get sheet names
xls = pd.ExcelFile(excel_file_path)
sheet_names = xls.sheet_names

print(f"Number of sheets: {len(sheet_names)}")
print("Sheet names:")
for i, sheet in enumerate(sheet_names, 1):
    print(f"  {i}. {sheet}")


Number of sheets: 5
Sheet names:
  1. Kampli
  2. Kampli 2
  3. Siruguppa
  4. Bellary 1
  5. Bellary 2


In [4]:
# Check columns in each sheet
sheet_columns = {}
for sheet_name in sheet_names:
    df = pd.read_excel(xls, sheet_name=sheet_name)
    sheet_columns[sheet_name] = list(df.columns)
    print(f"\n{sheet_name} ({df.shape[0]} rows, {df.shape[1]} columns):")
    
    # print("Columns:", df.columns.tolist())



Kampli (1142 rows, 13 columns):

Kampli 2 (1338 rows, 13 columns):

Siruguppa (678 rows, 13 columns):

Bellary 1 (834 rows, 13 columns):

Bellary 2 (496 rows, 13 columns):


In [12]:
# Let's examine the first few rows of each sheet to understand the data structure


for sheet_name in sheet_names:
    print(f"\n{'='*60}")
    print(f"SHEET: {sheet_name}")
    print(f"{'='*60}")
    
    df = pd.read_excel(xls, sheet_name=sheet_name)
    
    print(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns")
    print(f"\nColumn names:")
    for i, col in enumerate(df.columns, 1):
        print(f"  {i:2d}. {col}")
    
    print(f"\nFirst 3 rows:")
    display(df.head(10))
    
    # print(f"\nData types:")
    # for col in df.columns:
    #     print(f"  {col}: {df[col].dtype}")
    
    # # Break after first sheet for now to see the structure
    # break



SHEET: Kampli
Shape: 1142 rows × 13 columns

Column names:
   1. S.no
   2. Status
   3. Farmer Code
   4. Farmer Name
   5. Govt id num
   6. Primary Contact
   7. Farmer Created Date
   8. Plot Created Date
   9. Sync Date
  10. Updation Date
  11. Area
  12. GPS Area
  13. Village

First 3 rows:


Unnamed: 0,S.no,Status,Farmer Code,Farmer Name,Govt id num,Primary Contact,Farmer Created Date,Plot Created Date,Sync Date,Updation Date,Area,GPS Area,Village
0,1.0,Old,KA0105030050,P UDDANAPPA,391699283337.0,9972510000.0,2024-02-01,2024-02-01 00:00:00,2024-02-01 20:42:43,2024-02-01 20:44:04,1.13,0.95,OLD DAROJI
1,,verified,,,,,NaT,,NaT,NaT,,,
2,2.0,Old,KA0105030049,P BASAVARAJA,672683528309.0,9900217000.0,2024-02-01,"01 Feb 2024 ,01 Feb 2024",2024-02-01 20:41:40,2024-02-01 20:43:06,1.88,1.8,OLD DAROJI
3,,verified,,,,,NaT,,NaT,NaT,,,
4,3.0,Old,KA0105030048,ARJUNA K,409069838229.0,7353126000.0,2024-01-31,"31 Jan 2024 ,31 Jan 2024 ,31 Jan 2024 ,31 Jan ...",2024-01-31 20:17:37,2024-01-31 20:19:36,4.75,4.9,OLD DAROJI
5,,verified,,,,,NaT,,NaT,NaT,,,
6,4.0,Old,KA0105030047,V AMBANNA,745586238369.0,8904448000.0,2024-01-31,"31 Jan 2024 ,31 Jan 2024 ,31 Jan 2024",2024-01-31 20:09:31,2024-01-31 20:16:02,3.2,3.09,OLD DAROJI
7,,verified,,,,,NaT,,NaT,NaT,,,
8,5.0,New,KA0105030046,K M RENUKAMMA,862598824330.0,8147873000.0,2024-01-30,2024-01-30 00:00:00,2024-01-30 19:20:47,2024-01-30 19:21:07,1.04,1.05,OLD DAROJI
9,,verified,,,,,NaT,,NaT,NaT,,,



SHEET: Kampli 2
Shape: 1338 rows × 13 columns

Column names:
   1. S.no
   2. Status
   3. Farmer Code
   4. Farmer Name
   5. Govt id num
   6. Primary Contact
   7. Farmer Created Date
   8. Plot Created Date
   9. Sync Date
  10. Updation Date
  11. Area
  12. GPS Area
  13. Village

First 3 rows:


Unnamed: 0,S.no,Status,Farmer Code,Farmer Name,Govt id num,Primary Contact,Farmer Created Date,Plot Created Date,Sync Date,Updation Date,Area,GPS Area,Village
0,1.0,New,KA0101250062,Mabu Bee,763078300000.0,9964172000.0,2024-05-22,,2024-05-23 07:18:23,NaT,12.6,0.0,OLD NELLUDI
1,,pending,,,,,NaT,,NaT,NaT,,,
2,2.0,New,KA0101250061,Honnura Basha S,655057500000.0,9964172000.0,2024-05-22,,2024-05-23 07:17:40,NaT,3.22,0.0,OLD NELLUDI
3,,pending,,,,,NaT,,NaT,NaT,,,
4,3.0,New,KA0101250060,Kolli Venkateshwara Rao,793450800000.0,7204279000.0,2024-05-10,,2024-05-11 06:14:58,NaT,4.05,0.0,OLD NELLUDI
5,,pending,,,,,NaT,,NaT,NaT,,,
6,4.0,New,KA0101250059,C.Pakeerappa,247720600000.0,9480952000.0,2024-04-08,2024-10-03 00:00:00,2024-04-08 19:16:47,NaT,5.55,5.33,OLD NELLUDI
7,,verified,,,,,NaT,,NaT,NaT,,,
8,5.0,New,KA0101250058,Honnurappa,780827700000.0,9964916000.0,2024-04-08,,2024-04-08 19:15:54,NaT,2.75,0.0,OLD NELLUDI
9,,pending,,,,,NaT,,NaT,NaT,,,



SHEET: Siruguppa
Shape: 678 rows × 13 columns

Column names:
   1. S.no
   2. Status
   3. Farmer Code
   4. Farmer Name
   5. Govt id num
   6. Primary Contact
   7. Farmer Created Date
   8. Plot Created Date
   9. Sync Date
  10. Updation Date
  11. Area
  12. GPS Area
  13. Village

First 3 rows:


Unnamed: 0,S.no,Status,Farmer Code,Farmer Name,Govt id num,Primary Contact,Farmer Created Date,Plot Created Date,Sync Date,Updation Date,Area,GPS Area,Village
0,1.0,New,KA0102030043,BHADRAGOUDA,290803714998,9743886000.0,2024-07-24,"24 Jul 2024 ,24 Jul 2024",2024-07-24 12:45:32,2024-07-24 12:58:31,4.91,4.86,ARLIGANURU
1,,pending,,,,,NaT,,NaT,NaT,,,
2,2.0,New,KA0102140012,N. RAJA RAO,UUB1230614,9480064000.0,2024-07-15,"15 Jul 2024 ,15 Jul 2024 ,15 Jul 2024",2024-07-15 17:02:46,2024-07-15 17:04:18,11.9,12.48,SIRUGUPPA
3,,pending,,,,,NaT,,NaT,NaT,,,
4,3.0,Old,KA0102050021,not,245926958790,,2024-04-18,"18 Apr 2024 ,18 Apr 2024",2024-04-18 16:33:07,NaT,,2.1,BAGEWADI
5,,pending,,,,,NaT,,NaT,NaT,,,
6,4.0,Old,KA0102050020,not,671328790400,,2024-04-18,2024-04-18 00:00:00,2024-04-18 16:32:29,NaT,,2.22,BAGEWADI
7,,pending,,,,,NaT,,NaT,NaT,,,
8,5.0,Old,KA0102050019,AMARESH,990311695550,7259550000.0,2024-04-03,"03 Apr 2024 ,03 Apr 2024",2024-04-03 15:18:40,2024-04-03 15:19:37,7.03,7.03,BAGEWADI
9,,pending,,,,,NaT,,NaT,NaT,,,



SHEET: Bellary 1
Shape: 834 rows × 13 columns

Column names:
   1. S.no
   2. Status
   3. Farmer Code
   4. Farmer Name
   5. Govt id num
   6. Primary Contact
   7. Farmer Created Date
   8. Plot Created Date
   9. Sync Date
  10. Updation Date
  11. Area
  12. GPS Area
  13. Village

First 3 rows:


Unnamed: 0,S.no,Status,Farmer Code,Farmer Name,Govt id num,Primary Contact,Farmer Created Date,Plot Created Date,Sync Date,Updation Date,Area,GPS Area,Village
0,1.0,New,KA0104130011,NINGAMMA,793775124217.0,8431451000.0,2024-04-08,,2024-04-08 17:51:48,NaT,0.5,0.0,MOKA
1,,pending,,,,,NaT,,NaT,NaT,,,
2,2.0,New,KA0104130010,VEERUPAKSHI,517508811362.0,8073122000.0,2024-03-28,,2024-03-28 16:24:17,NaT,7.0,0.0,MOKA
3,,pending,,,,,NaT,,NaT,NaT,,,
4,3.0,New,KA0104130009,KOLLI THIMMAPP,575829878864.0,9731432000.0,2024-03-23,,2024-03-27 10:59:53,NaT,7.14,0.0,MOKA
5,,pending,,,,,NaT,,NaT,NaT,,,
6,4.0,New,KA0104130008,PAVADAPPA,519781054698.0,7829675000.0,2024-03-23,"23 Mar 2024 ,23 Mar 2024",2024-03-23 18:28:32,2024-03-28 16:34:46,2.71,3.04,MOKA
7,,pending,,,,,NaT,,NaT,NaT,,,
8,5.0,New,KA0104130007,not,517500811362.0,,2024-03-23,"23 Mar 2024 ,23 Mar 2024 ,23 Mar 2024",2024-03-23 18:18:12,NaT,,6.11,MOKA
9,,pending,,,,,NaT,,NaT,NaT,,,



SHEET: Bellary 2
Shape: 496 rows × 13 columns

Column names:
   1. S.no
   2. Status
   3. Farmer Code
   4. Farmer Name
   5. Govt id num
   6. Primary Contact
   7. Farmer Created Date
   8. Plot Created Date
   9. Sync Date
  10. Updation Date
  11. Area
  12. GPS Area
  13. Village

First 3 rows:


Unnamed: 0,S.no,Status,Farmer Code,Farmer Name,Govt id num,Primary Contact,Farmer Created Date,Plot Created Date,Sync Date,Updation Date,Area,GPS Area,Village
0,1.0,Old,KA0104100086,MALLAPPA,592362633428.0,9535392759.0,2023-06-08 00:00:00,2023-06-08 00:00:00,2023-06-08 18:16:00,,1.17,1.07,KALLUKUTIGANAHALU
1,,verified,,,,,,,,,,,
2,2.0,Old,KA0104100085,Jambanna,327542372391.0,9611273413.0,2023-06-08 00:00:00,2023-06-08 00:00:00,2023-06-08 17:57:44,,1.36,1.05,KALLUKUTIGANAHALU
3,,verified,,,,,,,,,,,
4,3.0,Old,KA0104100084,N Parameshappa,605694793190.0,8971083113.0,2023-06-05 00:00:00,"05 Jun 2023 ,05 Jun 2023 ,07 Jun 2023",2023-06-08 17:25:48,,1.06,0.97,KALLUKUTIGANAHALU
5,,verified,,,,,,,,,,,
6,4.0,Old,KA0104100083,R RAGHAVENDRA,235270286778.0,9945141204.0,2023-06-05 00:00:00,2023-06-05 00:00:00,2023-06-05 18:25:35,,1.41,1.26,KALLUKUTIGANAHALU
7,,verified,,,,,,,,,,,
8,5.0,Old,KA0104100082,G GANGADHARA GOUDA,494463514876.0,9449233914.0,2023-06-02 00:00:00,"02 Jun 2023 ,02 Jun 2023",2023-06-02 14:03:25,,1.5,1.19,KALLUKUTIGANAHALU
9,,verified,,,,,,,,,,,
