In [1]:
import pandas as pd

In [2]:
df=pd.read_excel('data/New.xlsx')

In [3]:
df.drop(['Unnamed: 0','Unnamed: 1','Variety','Grade'],axis=1,inplace=True)

In [4]:
df['Year']=df['Date'].dt.year
df['Month']=df['Date'].dt.month
df['Day']=df['Date'].dt.day

In [5]:
df=df.dropna()

In [26]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.head()

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Model Price,Year,Month,Day
0,Junagarh,Junagadh,Soyabean,2750.0,3500.0,3125.0,2018.0,6.0,13.0
1,Surat,S.Mandvi,Green Gram (Moong)(Whole),5750.0,6710.0,6230.0,2020.0,6.0,14.0
2,Bhavnagar,Bhavnagar,Wheat,2345.0,2365.0,2355.0,2022.0,7.0,25.0
3,Rajkot,Dhoraji,Wheat,2405.0,2590.0,2570.0,2024.0,6.0,4.0
4,Amreli,Amreli,Wheat,2250.0,3090.0,2905.0,2023.0,2.0,4.0


In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler,OrdinalEncoder
from sklearn.linear_model import LinearRegression

In [62]:
X=df.drop('Model Price',axis=1)
y=df['Model Price']

In [28]:
df.columns

Index(['District Name', 'Market Name', 'Commodity', 'Min Price(Rs.)',
       'Max Price(Rs.)', 'Model Price', 'Year', 'Month', 'Day'],
      dtype='object')

In [10]:
preprocessor=ColumnTransformer(
    transformers=[
        ('district_name',OrdinalEncoder(),['District Name']),
        ('market_name',OrdinalEncoder(),['Market Name']),
        ('commodity',OrdinalEncoder(),['Commodity'])
        
        
    ],
    remainder='passthrough',
)

In [11]:
pipeline=Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('scaler', StandardScaler()),  # Optional: scale features
        ('model', LinearRegression())
    ]
)

In [21]:

df.head()

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Model Price,Year,Month,Day
0,Banaskanth,Amirgadh,Wheat,1565.0,1565.0,1565.0,2018.0,2.0,2.0
1,Banaskanth,Amirgadh,Wheat,1575.0,1705.0,1640.0,2018.0,6.0,11.0
2,Banaskanth,Amirgadh,Wheat,1650.0,1750.0,1700.0,2018.0,7.0,7.0
3,Banaskanth,Amirgadh,Wheat,1655.0,1720.0,1685.0,2018.0,7.0,5.0
4,Banaskanth,Amirgadh,Wheat,1800.0,1900.0,1850.0,2018.0,8.0,2.0


In [56]:
df.isnull().sum()

District Name     0
Market Name       0
Commodity         0
Min Price(Rs.)    0
Max Price(Rs.)    0
Model Price       0
Year              0
Month             0
Day               0
dtype: int64

In [63]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [64]:
pipeline.fit(X_train,y_train)

In [65]:
pipeline.predict(X_test)

array([1994.26844759, 7503.96483598, 1791.6600168 , ..., 2279.69719891,
       1882.27805457, 2421.63182309])

In [33]:
df[df['District Name']=='Khambhat']

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Model Price,Year,Month,Day


In [35]:
count = (df['District Name'] == 'Khambhat').sum()

In [36]:
count

0

In [37]:
X_test

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Year,Month,Day
154623,Bhavnagar,Mahuva(Station Road),Groundnut,4910.0,6565.0,2021.0,6.0,11.0
139155,Jamnagar,Dhrol,Green Gram (Moong)(Whole),4850.0,7005.0,2019.0,10.0,19.0
52039,Rajkot,Rajkot,Wheat,1765.0,2030.0,2021.0,7.0,22.0
100616,Dahod,Dahod,Groundnut,5000.0,5800.0,2022.0,1.0,4.0
78604,Junagarh,Visavadar,Soyabean,4810.0,5380.0,2022.0,11.0,3.0
...,...,...,...,...,...,...,...,...
13968,Porbandar,Porbandar,Wheat,1665.0,1705.0,2019.0,5.0,8.0
190546,Bhavnagar,Taleja,Green Gram (Moong)(Whole),6505.0,6845.0,2022.0,5.0,7.0
14351,Patan,Siddhpur,Wheat,2030.0,2225.0,2019.0,10.0,18.0
95579,Rajkot,Jasdan,Soyabean,5000.0,5450.0,2023.0,1.0,7.0


In [38]:
print(df['District Name'].unique())

['Junagarh' 'Surat' 'Bhavnagar' 'Rajkot' 'Amreli' 'Banaskanth'
 'Sabarkantha' 'Dahod' 'Jamnagar' 'Kheda' 'Mehsana' 'Chhota Udaipur'
 'Patan' 'Morbi' 'Botad' 'Gandhinagar' 'Bharuch' 'Anand'
 'Vadodara(Baroda)' 'Porbandar' 'Ahmedabad' 'Gir Somnath'
 'Devbhumi Dwarka' 'Surendranagar' 'Panchmahals' 'Kachchh' 'Narmada']


In [43]:
df[df['Market Name']=='Khambhat']

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Model Price,Year,Month,Day
161614,Anand,Khambhat,Cotton,3300.0,3400.0,3350.0,2018.0,9.0,18.0


In [60]:
df=df[df['Market Name']!='Lalpur']

In [61]:
df=df[df['District Name']!='Lalpur']

In [66]:
X_train.head()

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Year,Month,Day
144064,Sabarkantha,Talod,Wheat,2600.0,3055.0,2024.0,1.0,17.0
84555,Rajkot,Rajkot,Groundnut,5700.0,6390.0,2024.0,7.0,4.0
42603,Bhavnagar,Taleja,Groundnut,5000.0,6250.0,2024.0,4.0,1.0
135183,Amreli,Rajula,Soyabean,5105.0,5150.0,2022.0,12.0,16.0
64125,Bhavnagar,Mahuva(Station Road),Groundnut,4520.0,4520.0,2021.0,9.0,27.0


In [68]:
X_train.iloc[0]

District Name     Sabarkantha
Market Name             Talod
Commodity               Wheat
Min Price(Rs.)         2600.0
Max Price(Rs.)         3055.0
Year                   2024.0
Month                     1.0
Day                      17.0
Name: 144064, dtype: object

In [78]:
pd.DataFrame(X_train.iloc[0])

Unnamed: 0,144064
District Name,Sabarkantha
Market Name,Talod
Commodity,Wheat
Min Price(Rs.),2600.0
Max Price(Rs.),3055.0
Year,2024.0
Month,1.0
Day,17.0


In [79]:
first_row = X_train.iloc[0]
reshaped_first_row = pd.DataFrame([first_row], columns=X_train.columns)

In [86]:
first_row = pd.DataFrame([X_train.iloc[0].values], columns=X_train.columns)

In [87]:
pipeline.predict(first_row)

array([2860.2819386])

In [88]:
first_row

Unnamed: 0,District Name,Market Name,Commodity,Min Price(Rs.),Max Price(Rs.),Year,Month,Day
0,Sabarkantha,Talod,Wheat,2600.0,3055.0,2024.0,1.0,17.0


In [89]:
X_train.iloc[0].values

array(['Sabarkantha', 'Talod', 'Wheat', 2600.0, 3055.0, 2024.0, 1.0, 17.0],
      dtype=object)

In [90]:
 X_train.iloc[0]

District Name     Sabarkantha
Market Name             Talod
Commodity               Wheat
Min Price(Rs.)         2600.0
Max Price(Rs.)         3055.0
Year                   2024.0
Month                     1.0
Day                      17.0
Name: 144064, dtype: object