In [7]:
import pandas as pd

# Load the dataset
df = pd.read_csv('agriculture.csv')

# Display the first few rows of the dataset
print("Original Dataset:")
print(df.head())

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Drop rows with missing values
df.dropna(inplace=True)

# Check for duplicate rows
print("\nDuplicate Rows:")
print(df.duplicated().sum())

# Drop duplicate rows
df.drop_duplicates(inplace=True)

# Convert 'arrival_date' to datetime format
df['arrival_date'] = pd.to_datetime(df['arrival_date'])

# Display the cleaned dataset
print("\nCleaned Dataset:")
print(df.head())

# Save the cleaned dataset
df.to_csv('cleaned_dataset.csv', index=False)


Original Dataset:
                 state       district      market              commodity  \
0  Andaman and Nicobar  South Andaman  Port Blair             Amaranthus   
1  Andaman and Nicobar  South Andaman  Port Blair         Banana - Green   
2  Andaman and Nicobar  South Andaman  Port Blair  Bhindi(Ladies Finger)   
3  Andaman and Nicobar  South Andaman  Port Blair           Bitter gourd   
4  Andaman and Nicobar  South Andaman  Port Blair           Black pepper   

          variety arrival_date  min_price  max_price  modal_price  
0      Amaranthus   04/03/2019       6000       8000         7000  
1  Banana - Green   04/03/2019       4500       5500         5000  
2          Bhindi   04/03/2019       6000       8000         7000  
3           Other   04/03/2019       6000       8000         7000  
4           Other   04/03/2019     110000     130000       120000  

Missing Values:
state           0
district        0
market          0
commodity       0
variety         0
arrival_da

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the cleaned dataset
df = pd.read_csv('cleaned_dataset.csv')

# Feature Engineering: Encoding categorical variables
label_encoders = {}
categorical_columns = ['state', 'district', 'market', 'commodity', 'variety']
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Splitting Data into training and testing sets
X = df.drop(columns=['modal_price', 'arrival_date'])  # Features
y = df['modal_price']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection: Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Prediction example
sample_data = X_test.iloc[0].values.reshape(1, -1)
predicted_price = model.predict(sample_data)
print("Predicted Price:", predicted_price)


Mean Squared Error: 78467.27333794646
Predicted Price: [1923.14]




In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

# Load the cleaned dataset
df = pd.read_csv('cleaned_dataset.csv')

# Feature Engineering: Encoding categorical variables
label_encoders = {}
categorical_columns = ['state', 'district', 'market', 'commodity', 'variety']
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Splitting Data into training and testing sets
X = df.drop(columns=['modal_price', 'arrival_date'])  # Features
y = df['modal_price']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection: Gradient Boosting Regressor
model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

# Prediction example
sample_data = X_test.iloc[0].values.reshape(1, -1)
predicted_price = model.predict(sample_data)
print("Predicted Price:", predicted_price)


R-squared (R2) Score: 0.9937002047268667
Predicted Price: [1854.52448818]




In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the cleaned dataset
df = pd.read_csv('cleaned_dataset.csv')

# Feature Engineering: Encoding categorical variables
label_encoders = {}
categorical_columns = ['state', 'district', 'market', 'commodity', 'variety']
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Splitting Data into training and testing sets
X = df.drop(columns=['modal_price', 'arrival_date'])  # Features
y = df['modal_price']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Dictionary to hold model instances
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "SVR": SVR(),
    "KNN Regression": KNeighborsRegressor(),
    "Decision Tree Regression": DecisionTreeRegressor()
}

# Training and Evaluation
for name, model in models.items():
    print("Model:", name)
    # Training
    model.fit(X_train, y_train)
    # Prediction
    y_pred = model.predict(X_test)
    # Evaluation
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print("Mean Squared Error:", mse)
    print("R-squared (R2) Score:", r2)
    print("-------------------------------")


Model: Linear Regression
Mean Squared Error: 70432.77130403087
R-squared (R2) Score: 0.9905400725069639
-------------------------------
Model: Ridge Regression
Mean Squared Error: 70432.76855729558
R-squared (R2) Score: 0.9905400728758819
-------------------------------
Model: Lasso Regression
Mean Squared Error: 70429.15764286397
R-squared (R2) Score: 0.9905405578630274
-------------------------------
Model: ElasticNet Regression
Mean Squared Error: 70428.58108527347
R-squared (R2) Score: 0.9905406353013121
-------------------------------
Model: SVR
Mean Squared Error: 7172457.206567966
R-squared (R2) Score: 0.03665688762761299
-------------------------------
Model: KNN Regression
Mean Squared Error: 45797.822499999995
R-squared (R2) Score: 0.9938488281496295
-------------------------------
Model: Decision Tree Regression
Mean Squared Error: 80449.35714285714
R-squared (R2) Score: 0.9891947303599958
-------------------------------


In [1]:
import pandas as pd

# Load the dataset from a CSV file
try:
    df = pd.read_csv('cleaned_dataset.csv')
except FileNotFoundError:
    print("Dataset file not found!")
    exit()

# Function to get max and min prices of a commodity in a state
def get_price_range(state, commodity):
    # Filter data based on state and commodity
    filtered_data = df[(df['state'] == state) & (df['commodity'] == commodity)]
    
    if filtered_data.empty:
        return f"No data available for {commodity} in {state}."
    
    # Find max and min prices
    max_price = filtered_data['max_price'].max()
    min_price = filtered_data['min_price'].min()
    
    return f"Maximum price of {commodity} in {state}: {max_price}, Minimum price: {min_price}"

# Example usage
state = input("Enter state name: ")
commodity = input("Enter commodity: ")

print(get_price_range(state, commodity))


Enter state name: Andaman and Nicobar
Enter commodity: Banana - Green
Maximum price of Banana - Green in Andaman and Nicobar: 5500, Minimum price: 4500
