In [None]:
pip install transformers torch pandas scikit-learn


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer

# Load dataset
data = pd.read_csv('/content/corpus2 (1).csv')

# Encode labels
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Split dataset into train and test
train_texts, test_texts, train_labels, test_labels = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize text
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

class CropDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.tolist() # Convert labels to a list

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CropDataset(train_encodings, train_labels)
test_dataset = CropDataset(test_encodings, test_labels)


# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    learning_rate=5e-5,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

# Train model
trainer.train()

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load the pre-trained model and tokenizer (adjust paths as needed)
model = BertForSequenceClassification.from_pretrained('/content/results/checkpoint-880')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Put the model in evaluation mode
model.eval()

# Prepare your test dataset
inputs = {
    'input_ids': torch.tensor(test_dataset.encodings['input_ids']),
    'attention_mask': torch.tensor(test_dataset.encodings['attention_mask'])
}
test_labels = test_dataset.labels  # Access labels from test_dataset


# Make predictions
with torch.no_grad():
    outputs = model(**inputs)

# Get the predicted class labels
preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, preds)

# Calculate precision, recall, and F1 score (use 'weighted' for multi-class classification)
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, preds, average='weighted')

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


In [None]:
# Function to predict crop type based on input features
def predict_crop(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1)
    return label_encoder.inverse_transform(prediction.cpu().numpy())[0]

# Function to get feature values one by one from the user
def get_features():
    feature_names = ["N", "P", "K", "temperature", "humidity", "rainfall"]
    features = []

    print("Please enter the following features one by one:")

    for feature in feature_names:
        while True:
            try:
                value = float(input(f"Enter value for {feature}: "))
                features.append(f" {feature}:{value},")
                break
            except ValueError:
                print(f"Invalid input for {feature}. Please enter a numeric value.")

    # Convert features to the string format expected by the tokenizer
    return " ".join(map(str, features))

# Chatbot interface
def chatbot():
    print("Welcome to the Crop Recommendation Chatbot!")
    print("You will be asked to input various feature values to get a crop recommendation.")

    while True:
        text = input("Type 'start' to enter features, or 'exit' to quit: ").lower()

        if text == 'exit' or text == 'quit':
            print("Goodbye!")
            break
        elif text == 'start':
            # Get features from user
            features = get_features()

            # Predict crop based on the features
            crop = predict_crop(model, tokenizer, features)
            print(f"Recommended crop: {crop}")
        else:
            print("Invalid command. Please type 'start' to begin or 'exit' to quit.")

# Run chatbot
chatbot()
