In [1]:
import random
import requests
from datetime import datetime, timedelta

# Your OpenWeatherMap API key
api_key = '337586e7326dcb828d7a386379093040'

In [2]:
def get_weather(city):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    else:
        return None

def get_forecast(city):
    url = f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    else:
        return None

def generate_random_future_date():
    # Generate a random number of days between 1 and 5
    days = random.randint(1, 5)
    future_date = datetime.now() + timedelta(days=days)
    return future_date.strftime('%Y-%m-%d')

In [3]:
def generate_dataset(num_samples=20):
    cities = [
        "Kolkata", "New York", "London", "Tokyo", "Sydney", "Paris", "Berlin", "Toronto",
        "Mumbai", "Shanghai", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia",
        "San Antonio", "San Diego", "Dallas", "San Jose", "Austin"
    ]
    info_requests = ["current_weather", "forecast_weather"]
    dates = ["today", "tomorrow"] + [generate_random_future_date() for _ in range(8)]
    dataset = []

    for _ in range(num_samples):
        city = random.choice(cities)
        info_request = random.choice(info_requests)
        date = random.choice(dates) if info_request == "forecast_weather" else "today"
        user_prompt = f"What is the {info_request.replace('_', ' ')} in {city} {date}?"

        intent_extraction = {
            "intent": info_request,
            "entities": {
                "city": city,
                "date": date
            }
        }

        if info_request == "current_weather":
            api_response = get_weather(city)
        else:
            api_response = get_forecast(city)
        
        if api_response is not None:
            if info_request == "current_weather":
                weather = api_response['weather'][0]['description']
                temp = api_response['main']['temp']
                wind_speed = api_response['wind']['speed']
                clouds = api_response['clouds']['all']

                generated_response = (
                    f"The weather in {city} is currently {weather} with a temperature of {temp}°C and "
                    f"wind speed of {wind_speed} meters per second."
                )

                dataset.append({
                    "user_input": user_prompt,
                    "intent_extraction": intent_extraction,
                    "api_response": api_response,
                    "assistant_response": generated_response
                })
            
            elif info_request == "forecast_weather":
                forecast_list = api_response['list']
                filtered_forecasts = [forecast for forecast in forecast_list if date in forecast['dt_txt']]
                if filtered_forecasts:
                    selected_forecast = filtered_forecasts[0]
                    forecast_temp = selected_forecast['main']['temp']
                    forecast_weather = selected_forecast['weather'][0]['description']

                    generated_response = (
                        f"The forecast for {city} on {date} is {forecast_weather} with a temperature of {forecast_temp}°C."
                    )

                    dataset.append({
                        "user_input": user_prompt,
                        "intent_extraction": intent_extraction,
                        "api_response": api_response,
                        "assistant_response": generated_response
                    })

    return dataset

# Generate the dataset
final_dataset = generate_dataset()

# Print some examples
for example in final_dataset[:5]:
    print(example)


{'user_input': 'What is the current weather in Shanghai today?', 'intent_extraction': {'intent': 'current_weather', 'entities': {'city': 'Shanghai', 'date': 'today'}}, 'api_response': {'coord': {'lon': 121.4581, 'lat': 31.2222}, 'weather': [{'id': 803, 'main': 'Clouds', 'description': 'broken clouds', 'icon': '04n'}], 'base': 'stations', 'main': {'temp': 21, 'feels_like': 21.61, 'temp_min': 20.92, 'temp_max': 21.07, 'pressure': 1009, 'humidity': 94}, 'visibility': 10000, 'wind': {'speed': 2, 'deg': 160}, 'clouds': {'all': 75}, 'dt': 1717784684, 'sys': {'type': 2, 'id': 145096, 'country': 'CN', 'sunrise': 1717793386, 'sunset': 1717844216}, 'timezone': 28800, 'id': 1796236, 'name': 'Shanghai', 'cod': 200}, 'assistant_response': 'The weather in Shanghai is currently broken clouds with a temperature of 21°C and wind speed of 2 meters per second.'}
{'user_input': 'What is the current weather in Shanghai today?', 'intent_extraction': {'intent': 'current_weather', 'entities': {'city': 'Shangh

In [4]:
final_dataset[0]

{'user_input': 'What is the current weather in Shanghai today?',
 'intent_extraction': {'intent': 'current_weather',
  'entities': {'city': 'Shanghai', 'date': 'today'}},
 'api_response': {'coord': {'lon': 121.4581, 'lat': 31.2222},
  'weather': [{'id': 803,
    'main': 'Clouds',
    'description': 'broken clouds',
    'icon': '04n'}],
  'base': 'stations',
  'main': {'temp': 21,
   'feels_like': 21.61,
   'temp_min': 20.92,
   'temp_max': 21.07,
   'pressure': 1009,
   'humidity': 94},
  'visibility': 10000,
  'wind': {'speed': 2, 'deg': 160},
  'clouds': {'all': 75},
  'dt': 1717784684,
  'sys': {'type': 2,
   'id': 145096,
   'country': 'CN',
   'sunrise': 1717793386,
   'sunset': 1717844216},
  'timezone': 28800,
  'id': 1796236,
  'name': 'Shanghai',
  'cod': 200},
 'assistant_response': 'The weather in Shanghai is currently broken clouds with a temperature of 21°C and wind speed of 2 meters per second.'}

In [5]:
import random
import requests
from datetime import datetime, timedelta

# Your OpenWeatherMap API key
api_key = '337586e7326dcb828d7a386379093040'

# Function to get current weather
def get_weather(city):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

# Function to get weather forecast
def get_forecast(city):
    url = f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

# Generate random future date within the next 5 days
def generate_random_future_date():
    days = random.randint(1, 5)
    future_date = datetime.now() + timedelta(days=days)
    return future_date.strftime('%Y-%m-%d')

# Generate the dataset
def generate_dataset(num_samples=5):
    cities = [
        "Kolkata", "New York", "London", "Tokyo", "Sydney", "Paris", "Berlin", "Toronto",
        "Mumbai", "Shanghai", "Los Angeles", "Chicago", "Houston", "Phoenix", "Philadelphia",
        "San Antonio", "San Diego", "Dallas", "San Jose", "Austin"
    ]
    info_requests = ["current_weather", "forecast_weather"]
    dates = ["today", "tomorrow"] + [generate_random_future_date() for _ in range(8)]
    dataset = []

    for _ in range(num_samples):
        city = random.choice(cities)
        info_request = random.choice(info_requests)
        date = random.choice(dates) if info_request == "forecast_weather" else "today"
        user_prompt = f"What is the {info_request.replace('_', ' ')} in {city} {date}?"

        intent_extraction = {
            "intent": info_request,
            "entities": {
                "city": city,
                "date": date
            }
        }

        if info_request == "current_weather":
            api_response = get_weather(city)
            if api_response:
                weather = api_response['weather'][0]['description']
                temp = api_response['main']['temp']
                wind_speed = api_response['wind']['speed']
                humidity = api_response['main']['humidity']

                generated_response = (
                    f"The weather in {city} is currently {weather} with a temperature of {temp}°C, "
                    f"wind speed of {wind_speed} meters per second, and humidity of {humidity}%."
                )

                dataset.append({
                    "user_input": user_prompt,
                    "intent_extraction": intent_extraction,
                    "api_response": {
                        "location": f"{city}, {api_response['sys']['country']}",
                        "temperature": temp,
                        "description": weather,
                        "wind_speed": wind_speed,
                        "humidity": humidity
                    },
                    "assistant_response": generated_response
                })

        elif info_request == "forecast_weather":
            api_response = get_forecast(city)
            if api_response:
                forecast_list = api_response['list']
                filtered_forecasts = [forecast for forecast in forecast_list if date in forecast['dt_txt']]
                if filtered_forecasts:
                    selected_forecast = filtered_forecasts[0]
                    forecast_temp = selected_forecast['main']['temp']
                    forecast_weather = selected_forecast['weather'][0]['description']
                    wind_speed = selected_forecast['wind']['speed']
                    humidity = selected_forecast['main']['humidity']

                    generated_response = (
                        f"The forecast for {city} on {date} is {forecast_weather} with a temperature of {forecast_temp}°C, "
                        f"wind speed of {wind_speed} meters per second, and humidity of {humidity}%."
                    )

                    dataset.append({
                        "user_input": user_prompt,
                        "intent_extraction": intent_extraction,
                        "api_response": {
                            "date": date,
                            "temperature": forecast_temp,
                            "description": forecast_weather,
                            "wind_speed": wind_speed,
                            "humidity": humidity
                        },
                        "assistant_response": generated_response
                    })

    return dataset

# Generate the dataset
final_dataset = generate_dataset()

# Print some examples
for example in final_dataset[:5]:
    print(example)

{'user_input': 'What is the forecast weather in Los Angeles 2024-06-11?', 'intent_extraction': {'intent': 'forecast_weather', 'entities': {'city': 'Los Angeles', 'date': '2024-06-11'}}, 'api_response': {'date': '2024-06-11', 'temperature': 24.4, 'description': 'broken clouds', 'wind_speed': 4.07, 'humidity': 44}, 'assistant_response': 'The forecast for Los Angeles on 2024-06-11 is broken clouds with a temperature of 24.4°C, wind speed of 4.07 meters per second, and humidity of 44%.'}
{'user_input': 'What is the current weather in Paris today?', 'intent_extraction': {'intent': 'current_weather', 'entities': {'city': 'Paris', 'date': 'today'}}, 'api_response': {'location': 'Paris, FR', 'temperature': 21.57, 'description': 'clear sky', 'wind_speed': 3.6, 'humidity': 43}, 'assistant_response': 'The weather in Paris is currently clear sky with a temperature of 21.57°C, wind speed of 3.6 meters per second, and humidity of 43%.'}
{'user_input': 'What is the forecast weather in Toronto 2024-06