# Load The Data Through API 

In [2]:
import pandas as pd
from datetime import datetime
import boto3
import requests
import json
import time

In [57]:
# pip install boto3

In [48]:
def get_weather_data(api_key, city_list):
    """
    Fetches raw weather data for a list of cities.
    
    Args:
        api_key (str): Your WeatherAPI.com API key.
        city_list (list): A list of city names (strings).
        
    Returns:
        list: A list of dictionaries, where each dictionary is the raw JSON
              response for a city. Returns None on a critical error.
    """
    base_url = "http://api.weatherapi.com/v1"
    endpoint = "/current.json"
    
    all_weather_data = []
    
    for city in city_list:
        params = {
            "key": api_key,
            "q": city
        }
        
        try:
            print(f"Fetching data for {city}...")
            response = requests.get(base_url + endpoint, params=params)
            
            # Raise an exception for HTTP errors (4xx or 5xx)
            response.raise_for_status()
            
            # Append the raw JSON data to our list
            raw_data = response.json()
            all_weather_data.append(raw_data)
            
            # Be polite to the API and avoid hitting rate limits
            time.sleep(1) 
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for {city}: {e}")
            continue  # Move to the next city on error
            
    return all_weather_data

# --- Main execution ---
if __name__ == "__main__":
    # Your API key from WeatherAPI.com
    api_key = "a63c813cc1924ab5a7e65942251309"
    
    # List of at least 30 major cities from around the world
    cities = ['Mumbai', 'New Delhi', 'Bangalore', 'Chennai', 'Kolkata', 'Hyderabad', 'Ahmedabad', 'Pune']
    
    weather_data_list = get_weather_data(api_key, cities)
    
    if weather_data_list:
        # The data is already in raw format (list of dictionaries)
        # To print it in a raw, compact way, use json.dumps()
        # The indent=None argument removes formatting for a single-line string
        raw_json_output = json.dumps(weather_data_list, indent=None)
        
        print("\n--- Raw Weather Data Output ---")
        print(raw_json_output)
        
        # Or, for a more readable but still raw format
        print("\n--- Readable Raw Data (for inspection) ---")
        print(json.dumps(weather_data_list, indent=4))
        
        # Example of how to access data for a specific city from the list
        print("\n--- Example: Accessing data for London ---")
        for data in weather_data_list:
            if data["location"]["name"] == "London":
                print(json.dumps(data, indent=4))
                break
    else:
        print("Failed to retrieve any weather data.")

Fetching data for Mumbai...
Fetching data for New Delhi...
Fetching data for Bangalore...
Fetching data for Chennai...
Fetching data for Kolkata...
Fetching data for Hyderabad...
Fetching data for Ahmedabad...
Fetching data for Pune...

--- Raw Weather Data Output ---
[{"location": {"name": "Mumbai", "region": "Maharashtra", "country": "India", "lat": 18.975, "lon": 72.826, "tz_id": "Asia/Kolkata", "localtime_epoch": 1757774708, "localtime": "2025-09-13 20:15"}, "current": {"last_updated_epoch": 1757774700, "last_updated": "2025-09-13 20:15", "temp_c": 26.1, "temp_f": 79.0, "is_day": 0, "condition": {"text": "Light rain", "icon": "//cdn.weatherapi.com/weather/64x64/night/296.png", "code": 1183}, "wind_mph": 6.7, "wind_kph": 10.8, "wind_degree": 308, "wind_dir": "NW", "pressure_mb": 1008.0, "pressure_in": 29.77, "precip_mm": 0.0, "precip_in": 0.0, "humidity": 89, "cloud": 75, "feelslike_c": 28.9, "feelslike_f": 83.9, "windchill_c": 27.4, "windchill_f": 81.4, "heatindex_c": 31.4, "heatin

In [76]:
# raw_json_output_json=json.loads(raw_json_output)

# Dump To S3

In [3]:
file_name = f"output_{datetime.now().strftime('%Y-%m-%d')}.json"

In [87]:
s3=boto3.client('s3')
bucket_name='jibin.spotify.dump'
folder_name='raw_json'
file_name = f"output_{datetime.now().strftime('%Y-%m-%d')}.json"
s3_file_key = f"{folder_name}/{file_name}"

In [88]:
s3.put_object(
Bucket=bucket_name,
Key=s3_file_key, 
Body=raw_json_output,
ContentType='application/json')

{'ResponseMetadata': {'RequestId': '8YHRKH48NEQYW454',
  'HostId': 'WQNc4antWSizgejGanzilvlnqLGIPVP3b8zMxwRGj8sp1w2Fb1VVIPrGs/DsDzTLCzlpOGJIo/NWnOlqhMWYv8AVusWRR3/PkXG2yaKx0uk=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'WQNc4antWSizgejGanzilvlnqLGIPVP3b8zMxwRGj8sp1w2Fb1VVIPrGs/DsDzTLCzlpOGJIo/NWnOlqhMWYv8AVusWRR3/PkXG2yaKx0uk=',
   'x-amz-request-id': '8YHRKH48NEQYW454',
   'date': 'Sun, 14 Sep 2025 06:40:00 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"120e5188643a44b6b4d82542eaf8f861"',
   'x-amz-checksum-crc32': 'y21DQw==',
   'x-amz-checksum-type': 'FULL_OBJECT',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"120e5188643a44b6b4d82542eaf8f861"',
 'ChecksumCRC32': 'y21DQw==',
 'ChecksumType': 'FULL_OBJECT',
 'ServerSideEncryption': 'AES256'}