# Database Operations

This notebook provides interactive access to the Neon database using our CRUD operations.

## Setup

In [5]:
import asyncio
import nest_asyncio
from datetime import datetime, timezone
from src.database import (
    get_db,
    get_podcast_by_id,
    get_podcast_by_rss_url,
    create_podcast,
    list_podcasts,
    get_episode_by_guid,
    create_episode,
    get_podcast_episodes,
    get_recent_episodes
)

# Allow nested event loops in Jupyter
nest_asyncio.apply()

async def run_db_operation(operation):
    """Run a database operation in an async context"""
    async with get_db() as db:
        return await operation(db)

def run_async(coro):
    """Run an async operation from a sync context"""
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    return loop.run_until_complete(coro)

### Helper Function for Async Operations

In [6]:
async def run_db_operation(operation):
    """Run a database operation in an async context"""
    async with get_db() as db:
        return await operation(db)

def run_async(coro):
    """Run an async operation from a sync context"""
    return asyncio.get_event_loop().run_until_complete(coro)

## Podcast Operations

### List All Podcasts

In [None]:
# Get all podcasts with episode counts
podcasts = run_async(run_db_operation(lambda db: list_podcasts(db, with_episode_count=True)))

for p in podcasts:
    print(f"\nPodcast: {p['name']}")
    print(f"ID: {p['id']}")
    print(f"Episodes: {p.get('episode_count', 0)}")
    print(f"RSS: {p['rss_url']}")

### Find Podcast by RSS URL

In [None]:
# Replace with your RSS URL
RSS_URL = "https://techtalkweekly.com/feed"

podcast = run_async(run_db_operation(lambda db: get_podcast_by_rss_url(db, RSS_URL)))
if podcast:
    print(f"Found: {podcast.name} (ID: {podcast.id})")
else:
    print("Podcast not found")

### Create New Podcast

In [None]:
new_podcast_data = {
    "name": "My Test Podcast",
    "publisher": "Test Publisher",
    "description": "A test podcast",
    "rss_url": "https://test.com/feed",
    "image_url": "https://test.com/image.png",
    "tags": ["test", "demo"]
}

async def create_new_podcast(db, data):
    podcast = await create_podcast(db, data)
    await db.commit()
    return podcast

new_podcast = run_async(run_db_operation(lambda db: create_new_podcast(db, new_podcast_data)))
print(f"Created: {new_podcast.name} (ID: {new_podcast.id})")

### Upload podcasts from csv

In [16]:
import pandas as pd
import ast
from datetime import datetime
import pytz

# Read the CSV file
df = pd.read_csv('../../podcasts.csv')

# Function to safely evaluate string representation of list
def parse_tags(tags_str):
    try:
        return ast.literal_eval(tags_str) if pd.notna(tags_str) else []
    except:
        return []

# Process each podcast
async def import_podcasts(db):
    for _, row in df.iterrows():
        # Check if podcast already exists
        existing = await get_podcast_by_rss_url(db, row['rss_url'])
        if existing:
            print(f"Skipping existing podcast: {row['name']}")
            continue
            
        # Prepare podcast data
        podcast_data = {
            'name': row['name'],
            'publisher': row['publisher'] if pd.notna(row['publisher']) else None,
            'description': row['description'],
            'rss_url': row['rss_url'],
            'image_url': row['image_url'],
            'tags': parse_tags(row['tags']),
            'frequency': str(row['frequency']) if pd.notna(row['frequency']) else None,
            'created_at': datetime.now(pytz.UTC)
        }
        
        # Create the podcast
        try:
            await create_podcast(db, podcast_data)
            print(f"Created podcast: {podcast_data['name']}")
        except Exception as e:
            print(f"Error creating podcast {podcast_data['name']}: {str(e)}")
    
    # Commit all changes
    await db.commit()

# Run the import
result = run_async(run_db_operation(import_podcasts))
print("\nImport completed!")

Created podcast: All-In
Created podcast: Call Her Daddy
Created podcast: Conan O'Brien Needs a Friend
Created podcast: The Daily
Created podcast: The Ezra Klein Show
Created podcast: Fresh Air
Created podcast: Hard Fork
Created podcast: Huberman Lab
Created podcast: The Joe Rogan Experience
Created podcast: Lex Fridman Podcast
Created podcast: The Megyn Kelly Show
Created podcast: The Mel Robbins Podcast
Created podcast: New Heights With Jason & Travis Kelce
Created podcast: Newsroom Robots
Created podcast: Pod Save America
Created podcast: Talking Headways
Created podcast: This Past Weekend w/ Theo Von
Created podcast: The Weekly Show with Jon Stewart
Created podcast: WTF with Marc Maron

Import completed!


### Update from podcasts.csv

In [18]:
import pandas as pd
import ast
from datetime import datetime
import pytz
import asyncio

# Read the CSV file
df = pd.read_csv('../../podcasts.csv')

# Function to safely evaluate string representation of list
def parse_tags(tags_str):
    try:
        return ast.literal_eval(tags_str) if pd.notna(tags_str) else []
    except:
        return []

# Process each podcast
async def update_podcasts(db):
    updated_count = 0
    print("Starting podcast updates...")
    
    for _, row in df.iterrows():
        print(f"Processing {row['name']}...")
        # Check if podcast exists
        existing = await get_podcast_by_rss_url(db, row['rss_url'])
        if not existing:
            print(f"Skipping non-existent podcast: {row['name']}")
            continue
            
        # Prepare podcast data
        podcast_data = {
            'name': row['name'],
            'publisher': row['publisher'] if pd.notna(row['publisher']) else None,
            'description': row['description'],
            'image_url': row['image_url'],
            'tags': parse_tags(row['tags']),
            'frequency': str(row['frequency']) if pd.notna(row['frequency']) else None,  # Convert to string
        }
        
        # Check if any data has changed
        has_changes = False
        for key, value in podcast_data.items():
            current_value = getattr(existing, key)
            if current_value != value:
                print(f"Found change in {key}: {current_value} -> {value}")
                has_changes = True
                setattr(existing, key, value)
        
        if has_changes:
            existing.updated_at = datetime.now(pytz.UTC)
            updated_count += 1
            print(f"Updated podcast: {podcast_data['name']}")
    
    # Commit all changes
    print("Committing changes...")
    await db.commit()
    print(f"\nUpdate completed! Modified {updated_count} podcasts.")
    return updated_count

# Run the update with explicit event loop handling
try:
    loop = asyncio.get_running_loop()
except RuntimeError:
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

print("Starting update process...")
result = loop.run_until_complete(run_db_operation(update_podcasts))
print("Update process finished!")

Starting update process...
Starting podcast updates...
Processing All-In...
Found change in publisher: None -> All-In Podcast, LLC
Updated podcast: All-In
Processing Call Her Daddy...
Found change in publisher: None -> Alex Cooper
Updated podcast: Call Her Daddy
Processing Conan O'Brien Needs a Friend...
Found change in publisher: None -> Team Coco & Earwolf
Updated podcast: Conan O'Brien Needs a Friend
Processing The Daily...
Found change in publisher: None -> The New York Times
Updated podcast: The Daily
Processing The Ezra Klein Show...
Found change in publisher: None -> New York Times Opinion
Updated podcast: The Ezra Klein Show
Processing Fresh Air...
Found change in publisher: None -> NPR
Updated podcast: Fresh Air
Processing Hard Fork...
Found change in publisher: None -> The New York Times
Updated podcast: Hard Fork
Processing Huberman Lab...
Found change in publisher: None -> Scicomm Media
Updated podcast: Huberman Lab
Processing The Joe Rogan Experience...
Found change in pub

## Episode Operations

### Get Recent Episodes

In [None]:
recent = run_async(run_db_operation(lambda db: get_recent_episodes(db, limit=5)))

for ep in recent:
    print(f"\nEpisode: {ep.title}")
    print(f"Podcast: {ep.podcast.name}")
    print(f"Published: {ep.publish_date}")
    if ep.summary:
        print(f"Summary: {ep.summary[:100]}...")

### Get Episodes for a Specific Podcast

In [None]:
# Replace with your podcast ID
PODCAST_ID = "your-podcast-id-here"

episodes = run_async(run_db_operation(lambda db: get_podcast_episodes(db, PODCAST_ID)))

for ep in episodes:
    print(f"\nEpisode: {ep.title}")
    print(f"Published: {ep.publish_date}")
    print(f"GUID: {ep.rss_guid}")

### Create New Episode

In [None]:
new_episode_data = {
    "podcast_id": "your-podcast-id-here",  # Replace with actual ID
    "rss_guid": "unique-guid-here",
    "title": "Test Episode",
    "publish_date": datetime.now(timezone.utc),
    "summary": "A test episode"
}

async def create_new_episode(db, data):
    episode = await create_episode(db, data)
    await db.commit()
    return episode

new_episode = run_async(run_db_operation(lambda db: create_new_episode(db, new_episode_data)))
print(f"Created: {new_episode.title} (ID: {new_episode.id})")

### Find Episode by GUID

In [None]:
# Replace with your episode GUID
EPISODE_GUID = "your-guid-here"

episode = run_async(run_db_operation(lambda db: get_episode_by_guid(db, EPISODE_GUID)))
if episode:
    print(f"Found: {episode.title}")
    print(f"Podcast ID: {episode.podcast_id}")
    print(f"Published: {episode.publish_date}")
else:
    print("Episode not found")