<a href="https://colab.research.google.com/github/arunv8055/Soya-Market-Analysis/blob/main/Soya_Market_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Essential Library for Soya Analysis

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# Importing and Preparing the dataset

In [33]:
url = 'https://raw.githubusercontent.com/arunv8055/Soya-Market-Analysis/main/Soya_August_Price.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,State Name,District Name,Market Name,Variety,Group,Arrivals (Tonnes),Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Reported Date
0,Andhra Pradesh,Kurnool,Kurnool,Soyabeen,Oil Seeds,0.1,2509,4610,2509,27-Jul-24
1,Chattisgarh,Balodabazar,Bhatapara,Other,Oil Seeds,0.4,4200,4281,4281,26-Sep-24
2,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,340.0,4050,4050,4050,2-Aug-24
3,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,354.0,4100,4100,4100,10-Sep-24
4,Chattisgarh,Kabirdham,Kawardha,Yellow,Oil Seeds,0.1,4200,4200,4200,29-Aug-24


In [34]:
df['Reported Date'] = pd.to_datetime(df['Reported Date'], format='%d-%b-%y')


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18133 entries, 0 to 18132
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   State Name                 18133 non-null  object        
 1   District Name              18133 non-null  object        
 2   Market Name                18133 non-null  object        
 3   Variety                    18133 non-null  object        
 4   Group                      18133 non-null  object        
 5   Arrivals (Tonnes)          18124 non-null  object        
 6   Min Price (Rs./Quintal)    18133 non-null  int64         
 7   Max Price (Rs./Quintal)    18133 non-null  int64         
 8   Modal Price (Rs./Quintal)  18133 non-null  int64         
 9   Reported Date              18133 non-null  datetime64[ns]
dtypes: datetime64[ns](1), int64(3), object(6)
memory usage: 1.4+ MB


In [36]:
df.head()

Unnamed: 0,State Name,District Name,Market Name,Variety,Group,Arrivals (Tonnes),Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Reported Date
0,Andhra Pradesh,Kurnool,Kurnool,Soyabeen,Oil Seeds,0.1,2509,4610,2509,2024-07-27
1,Chattisgarh,Balodabazar,Bhatapara,Other,Oil Seeds,0.4,4200,4281,4281,2024-09-26
2,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,340.0,4050,4050,4050,2024-08-02
3,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,354.0,4100,4100,4100,2024-09-10
4,Chattisgarh,Kabirdham,Kawardha,Yellow,Oil Seeds,0.1,4200,4200,4200,2024-08-29


In [37]:
from geopy.geocoders import ArcGIS

# Initialize ArcGIS geocoder with increased timeout
nom = ArcGIS(timeout=10)  # Set timeout to 10 seconds or more if needed
df["location"]=df.apply(lambda x: f"{x['State Name']}, {x['District Name']}, {x['Market Name']}",axis=1)

In [38]:
df_unique = df[['location']]
df_unique.drop_duplicates(inplace = True)

In [39]:
df_unique['latitude'] = df_unique["location"].apply(lambda x: nom.geocode(x).latitude)
df_unique['longitude'] = df_unique["location"].apply(lambda x: nom.geocode(x).longitude)

In [44]:
df_unique

Unnamed: 0,location,latitude,longitude
0,"Andhra Pradesh, Kurnool, Kurnool",15.831204,78.036177
1,"Chattisgarh, Balodabazar, Bhatapara",21.732599,81.946098
2,"Chattisgarh, Kabirdham, Biranpur kalan (Sahasp...",21.834106,81.126526
4,"Chattisgarh, Kabirdham, Kawardha",22.092511,81.222406
7,"Chattisgarh, Rajnandgaon, Gandai",21.665701,81.100403
...,...,...,...
16854,"Uttrakhand, UdhamSinghNagar, Bazpur",29.158001,79.147598
17015,"Telangana, Warangal, Warangal",17.974507,79.611362
17305,"Madhya Pradesh, Khandwa, Badwah(F&V)",21.821173,76.346161
17852,"Maharashtra, Jalana, Jafrabad",20.192411,76.019254


In [45]:
# @title Combining the dataset
# Merge df with df_unique on the 'location' column
df_merged = pd.merge(df, df_unique, on='location', how='left')

# Display the first few rows of the merged DataFrame
df_merged.head()

Unnamed: 0,State Name,District Name,Market Name,Variety,Group,Arrivals (Tonnes),Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Reported Date,location,latitude,longitude
0,Andhra Pradesh,Kurnool,Kurnool,Soyabeen,Oil Seeds,0.1,2509,4610,2509,2024-07-27,"Andhra Pradesh, Kurnool, Kurnool",15.831204,78.036177
1,Chattisgarh,Balodabazar,Bhatapara,Other,Oil Seeds,0.4,4200,4281,4281,2024-09-26,"Chattisgarh, Balodabazar, Bhatapara",21.732599,81.946098
2,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,340.0,4050,4050,4050,2024-08-02,"Chattisgarh, Kabirdham, Biranpur kalan (Sahasp...",21.834106,81.126526
3,Chattisgarh,Kabirdham,Biranpur kalan (Sahaspur Lohra),Yellow,Oil Seeds,354.0,4100,4100,4100,2024-09-10,"Chattisgarh, Kabirdham, Biranpur kalan (Sahasp...",21.834106,81.126526
4,Chattisgarh,Kabirdham,Kawardha,Yellow,Oil Seeds,0.1,4200,4200,4200,2024-08-29,"Chattisgarh, Kabirdham, Kawardha",22.092511,81.222406


In [46]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18133 entries, 0 to 18132
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   State Name                 18133 non-null  object        
 1   District Name              18133 non-null  object        
 2   Market Name                18133 non-null  object        
 3   Variety                    18133 non-null  object        
 4   Group                      18133 non-null  object        
 5   Arrivals (Tonnes)          18124 non-null  object        
 6   Min Price (Rs./Quintal)    18133 non-null  int64         
 7   Max Price (Rs./Quintal)    18133 non-null  int64         
 8   Modal Price (Rs./Quintal)  18133 non-null  int64         
 9   Reported Date              18133 non-null  datetime64[ns]
 10  location                   18133 non-null  object        
 11  latitude                   18133 non-null  float64       
 12  long