# Course: Data Engineering
# **Practical Exercise: Integrating Weather Data Into a Sales Dataset Using APIs and MongoDBB**
# Prepared by: Georges Assaf



<a href="https://colab.research.google.com/github/gassaf2/DataEngineering/blob/main/week3/Practical Exercise Integrating Weather Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd


# Step 1: Extract Sales Data from CSV

The first step is to extract the sales data from the CSV file. This will serve as the main
dataset to which we will add weather data

In [2]:
# Load the sales data from a CSV file
sales_data = pd.read_csv('./sample_data/sales_data.csv')

# Display the first few rows of the DataFrame
sales_data.head()


Unnamed: 0,date,product_id,sales_amount,store_location
0,2025-02-05,P001,150,New York
1,2025-02-05,P002,300,Los Angeles
2,2025-02-05,P003,450,Chicago
3,2025-02-05,P004,600,Houston
4,2025-02-05,P005,750,Seattle


# Step 2: Fetch Weather Data from the API

Next, use the OpenWeatherMap API to fetch weather data for each store location on the
corresponding transaction date.<br>API Setup:<br>
• Sign up for an OpenWeatherMap API key at OpenWeatherMap.<br>
• Use the requests library in Python to pull weather data.

In [14]:
def fetch_weather_data(city, date, api_key):
    base_url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"
    response = requests.get(base_url)
    #print(response)
    data = response.json()
    #print(data)
    # Extract temperature, humidity, and weather description
    temperature = data['main']['temp'] - 273.15 # Convert from Kelvin to Celsius
    humidity = data['main']['humidity']
    weather_description = data['weather'][0]['description']
    return temperature, humidity, weather_description

In [17]:
api_key="4b9b6f7856d381b679f2fc24d00e0ea2"
api_key="ac40e378daf6601e197b09f6e4be945e"
temp, humidity, description = fetch_weather_data('New York', '2025-02-02', api_key)
print(f"Temp: {temp}, Humidity: {humidity}, Weather: {description}")


Temp: -2.069999999999993, Humidity: 49, Weather: overcast clouds


# Step 3: Combine Weather Data with Sales Data

In [21]:
# Loop through each row of the sales_data dataframe, call the function and update the dataframe with weather data
for index, row in sales_data.iterrows():
    temp, humidity, description = fetch_weather_data(row["store_location"], row["date"],api_key)
    sales_data.at[index, "Temperature (°C)"] = temp
    sales_data.at[index, "Humidity (%)"] = humidity
    sales_data.at[index, "Weather Description"] = description

In [22]:
sales_data

Unnamed: 0,date,product_id,sales_amount,store_location,Temperature (°C),Humidity (%),Weather Description
0,2025-02-05,P001,150,New York,-1.83,49.0,overcast clouds
1,2025-02-05,P002,300,Los Angeles,11.84,96.0,mist
2,2025-02-05,P003,450,Chicago,-2.77,74.0,overcast clouds
3,2025-02-05,P004,600,Houston,21.47,93.0,overcast clouds
4,2025-02-05,P005,750,Seattle,-0.86,92.0,snow
5,2025-02-05,P001,900,New York,-1.83,49.0,overcast clouds
6,2025-02-05,P002,950,Los Angeles,11.84,96.0,mist
7,2025-02-06,P001,150,New York,-1.83,49.0,overcast clouds
8,2025-02-06,P002,300,Los Angeles,11.84,96.0,mist
9,2025-02-06,P003,450,Chicago,-2.77,74.0,overcast clouds


# Step 4: Load the Integrated Data into MongoDB

Finally, you will load the integrated sales and weather data into MongoDB for future analysis

In [24]:
from pymongo import MongoClient
from datetime import datetime

#connection_string="mongodb+srv://gassaf2:dbUserPassword@products.g02gx.mongodb.net/?retryWrites=true&w=majority&appName=products"
connection_string="mongodb+srv://gassaf2:dbUserPassword@cluster0.xjx2q.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
# Connect to the MongoDB Atlas cluster
client = MongoClient(connection_string)

# Access a specific database
db = client['weather']

In [26]:
# Access a the collection sales within the database
sales = db['sales']

#Load the sales data

# Convert DataFrame to dictionary format
sales_dict = sales_data.to_dict(orient="records")

# Insert the sales data into MongoDB
sales.insert_many(sales_dict)

InsertManyResult([ObjectId('67a4241c0502cf80e8f336fc'), ObjectId('67a4241c0502cf80e8f336fd'), ObjectId('67a4241c0502cf80e8f336fe'), ObjectId('67a4241c0502cf80e8f336ff'), ObjectId('67a4241c0502cf80e8f33700'), ObjectId('67a4241c0502cf80e8f33701'), ObjectId('67a4241c0502cf80e8f33702'), ObjectId('67a4241c0502cf80e8f33703'), ObjectId('67a4241c0502cf80e8f33704'), ObjectId('67a4241c0502cf80e8f33705'), ObjectId('67a4241c0502cf80e8f33706'), ObjectId('67a4241c0502cf80e8f33707'), ObjectId('67a4241c0502cf80e8f33708'), ObjectId('67a4241c0502cf80e8f33709'), ObjectId('67a4241c0502cf80e8f3370a'), ObjectId('67a4241c0502cf80e8f3370b'), ObjectId('67a4241c0502cf80e8f3370c'), ObjectId('67a4241c0502cf80e8f3370d'), ObjectId('67a4241c0502cf80e8f3370e'), ObjectId('67a4241c0502cf80e8f3370f'), ObjectId('67a4241c0502cf80e8f33710'), ObjectId('67a4241c0502cf80e8f33711'), ObjectId('67a4241c0502cf80e8f33712'), ObjectId('67a4241c0502cf80e8f33713'), ObjectId('67a4241c0502cf80e8f33714'), ObjectId('67a4241c0502cf80e8f337