In [8]:
!pip install transformers datasets torch



In [9]:
import json
from datasets import Dataset

# Load your dataset
with open('summary1.json', 'r') as f:
    data = json.load(f)

# Prepare the data in the right format for fine-tuning
processed_data = []
for entry in data:
    # Assuming 'Rainfall', 'Wind Speed', 'Max Temp', 'Min Temp', and 'Morning Humidity' are your input features
    # and 'Advisory' is the output you're generating
    prompt = (f"Weather data: Rainfall: {entry['Rainfall']}, "
              f"Wind Speed: {entry['Wind Speed']}, "
              f"Max Temperature: {entry['Max Temp']}, "
              f"Min Temperature: {entry['Min Temp']}, "
              f"Morning Humidity: {entry['Morning Humidity']}% "
              f"Afternoon Humidity: {entry['Afternoon Humidity']}%\n\n")

    response = entry['Advisory']  # Output advisory message
    processed_data.append({'input': prompt, 'output': response})

# Convert to a Hugging Face Dataset
dataset = Dataset.from_list(processed_data)

# To check the first few entries
print(dataset[:5])  # Display first 5 rows for checking


{'input': ['Weather data: Rainfall: 29 mm, Wind Speed: Negligible, Max Temperature: 32.3Â°C, Min Temperature: 22.6Â°C, Morning Humidity: 94%% Afternoon Humidity: 78%%\n\n', 'Weather data: Rainfall: 70.4 mm, Wind Speed: Light, Max Temperature: 28.2Â°C, Min Temperature: 24Â°C, Morning Humidity: 87%% Afternoon Humidity: 77%%\n\n', 'Weather data: Rainfall: 40 mm, Wind Speed: 15 km/h, Max Temperature: 33.1Â°C, Min Temperature: 24.8Â°C, Morning Humidity: 92%% Afternoon Humidity: 72%%\n\n', 'Weather data: Rainfall: 5.5 mm, Wind Speed: 10 km/h, Max Temperature: 30.7Â°C, Min Temperature: 26.3Â°C, Morning Humidity: 88%% Afternoon Humidity: 65%%\n\n', 'Weather data: Rainfall: 22 mm, Wind Speed: Moderate, Max Temperature: 32.0Â°C, Min Temperature: 25.5Â°C, Morning Humidity: 85%% Afternoon Humidity: 72%%\n\n'], 'output': ['For Vellayani, moderate rainfall occurred during the week of 16th to 22nd November, with a significant downpour on 20th November. Expect calm conditions with no significant wind.

In [17]:
from transformers import GPTNeoForCausalLM, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import Dataset

# Load pre-trained GPT-Neo model and tokenizer
model_name = "EleutherAI/gpt-neo-1.3B"  # Example model name (GPT-Neo 1.3B)
model = GPTNeoForCausalLM.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['input'], truncation=True, padding="max_length", max_length=512)

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "output"])

# Prepare the input and labels (shifted for language modeling)
def create_labels(examples):
    input_ids = examples['input_ids']
    labels = input_ids.copy()
    labels[labels == tokenizer.pad_token_id] = -100  # Exclude padding from loss
    return {"labels": labels}

tokenized_datasets = tokenized_datasets.map(create_labels, batched=True)

RuntimeError: Failed to import transformers.models.gpt_neo.modeling_gpt_neo because of the following error (look up to see its traceback):
cannot import name 'CompileConfig' from 'transformers.generation' (C:\Users\arunp\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\transformers\generation\__init__.py)

In [16]:
training_args = TrainingArguments(
    output_dir="./weather_summary_model",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    save_steps=500,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_datasets,
)

# Fine-tune the model
trainer.train()

NameError: name 'TrainingArguments' is not defined

In [None]:
trainer.save_model("./weather_summary_model")
tokenizer.save_pretrained("./weather_summary_model")

In [None]:
def generate_advisory(weather_data):
    input_prompt = f"Weather data: Rainfall: {weather_data['rainfall']}mm, Wind Speed: {weather_data['wind_speed']}km/h, Temperature: {weather_data['temperature']}°C, Humidity: {weather_data['humidity']}%\n\n"
    inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
    
    # Generate prediction
    outputs = model.generate(**inputs, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)
    advisory = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return advisory

# Example usage
new_weather_data = {"rainfall": 12, "wind_speed": 10, "temperature": 28, "humidity": 70}
advisory = generate_advisory(new_weather_data)
print(advisory)