In [None]:
import requests
import pandas as pd
from pymongo import MongoClient
from datetime import datetime
from datetime import date
import json

In [None]:
# URL template for fetching event data
url_template = "https://meta.et4.de/rest.ashx/search/?type=Event&experience=kiel&mkt=de&maxresponsetime=0&licensekey=t1.eyJhdWQiOiJwYWdlcyIsImV4cCI6MTczMjA3ODg1Mn0.n5x34HI7ACWNlPaFN6LXTSDXdrI&cause=pages.finder&q=all%3Aall+-systag%3Ahas_abnormal_interval&mode=next_months%2C3&sort=start+asc&unrollIntervals=true&offset={offset}&limit=100&template=ET2014A_LIGHT.json"

# Set up headers and payload (empty in this case)
payload = {}
headers = {}

# Initialize variables
all_events = []
offset = 0
limit = 100  # Maximum number of results per page

# Loop through the pages of results
while True:
    # Generate the current URL with the correct offset
    url = url_template.format(offset=offset)
    
    # Send GET request
    response = requests.get(url, headers=headers, data=payload)
    
    # Check if the response is successful (status code 200)
    if response.status_code == 200:
        event_data = response.json()  # Parse the JSON response
        
        # Extract the list of events
        events = event_data.get('items', [])
        
        # If no events were returned, break the loop
        if not events:
            break
        
        # Append the events to the all_events list
        all_events.extend(events)
        
        # Increment the offset for the next page
        offset += limit
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        break

# Convert the list of all events into a DataFrame
df = pd.json_normalize(all_events)

# Save the DataFrame to a JSON file
df.to_json('Events_Kiel.json', orient='records', lines=True)

print("Data has been saved to 'Events_Kiel.json'")

# File containing JSON data
file_path = 'Events_Kiel.json'

# Initialize an empty list to store the JSON objects
all_events = []

# Read the file line by line
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        try:
            # Parse each line as a JSON object
            event = json.loads(line.strip())
            all_events.append(event)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")

# Define a function to extract relevant fields from each JSON object
def extract_relevant_info(event):
    # Extract start and end times from the event
    start_time = next((interval.get("start") for interval in event.get("timeIntervals", [])), None)
    end_time = next((interval.get("end") for interval in event.get("timeIntervals", [])), None)
    
    # Initialize variables for the new columns
    start_date = end_date = start_time_no_tz = end_time_no_tz = None

    # If start_time exists, extract the date and time without timezone
    if start_time:
        start_time_obj = datetime.fromisoformat(start_time)
        start_date = start_time_obj.date()  # Extract the date
        start_time_no_tz = start_time_obj.strftime("%H:%M")  # Extract hour and minute

    # If end_time exists, extract the date and time without timezone
    if end_time:
        end_time_obj = datetime.fromisoformat(end_time)
        end_date = end_time_obj.date()  # Extract the date
        end_time_no_tz = end_time_obj.strftime("%H:%M")  # Extract hour and minute

    # Return the extracted data as a dictionary
    return {
        "global_id": event.get("global_id"),
        "title": event.get("title"),
        "type": event.get("type"),
        "categories": ", ".join(event.get("categories", [])),  # Join categories into a single string
        "teaser_text": next((text.get("value") for text in event.get("texts", []) if text.get("rel") == "teaser"), None),
        "country": event.get("country"),
        "city": event.get("city"),
        "zip": event.get("zip"),
        "street": event.get("street"),
        "phone": event.get("phone"),
        "image_url": next((media.get("url") for media in event.get("media_objects", []) if media.get("rel") == "default"), None),
        "latitude": event.get("geo.main.latitude"),
        "longitude": event.get("geo.main.longitude"),
        "start_date": start_date,
        "end_date": end_date,
        "start_time_no_tz": start_time_no_tz,
        "end_time_no_tz": end_time_no_tz
    }

# Process all events in the data
relevant_info = [extract_relevant_info(event) for event in all_events]

# Convert the processed data into a DataFrame
df = pd.DataFrame(relevant_info)

# Save the DataFrame to a CSV file
df.to_json('Relevant_Events_Kiel.json', orient='records', lines=True)

print("Relevant information has been extracted and saved to 'Relevant_Events_Kiel.json'.")


# Convert any datetime.date objects to strings
def convert_dates_to_strings(df):
    for column in ['start_date', 'end_date']:
        if column in df.columns:
            # Convert dates to strings in the format YYYY-MM-DD if they are not already strings
            df[column] = df[column].apply(lambda x: x.strftime('%Y-%m-%d') if isinstance(x, date) else x)
    return df

# Apply the conversion to your DataFrame
df = convert_dates_to_strings(df)

# Connect to the MongoDB server running in Docker
client = MongoClient('mongodb://localhost:27017')  # Adjust URL if necessary

# Access your database (create it if it doesn't exist)
db = client['Kiel_Events_Calendar']  # Replace with your actual database name

# Access your collection (create it if it doesn't exist)
collection = db['Events_collection']  # Replace with your actual collection name

# Remove existing documents to avoid duplication
collection.delete_many({})

# Check if the DataFrame is not empty before inserting data
if not df.empty:
    # Convert the DataFrame to a list of dictionaries
    data = df.to_dict(orient='records')
    
    # Insert the data into the collection
    result = collection.insert_many(data)
    print(f"Inserted {len(result.inserted_ids)} documents into the collection.")
else:
    print("The DataFrame is empty. No data to insert.")

# Print the first 5 documents in the collection to verify insertion
for doc in collection.find().limit(5):  # Adjust the limit as necessary
    print(doc)