In [3]:
# csv_to_sqlite.py
import sqlite3
import csv
import os
from datetime import datetime

# Configuration
CSV_FILE = "../data/netflix_titles.csv"
DB_FILE = "../data/netflix.db"

def create_db():
    """Cr√©e la base de donn√©es et la table avec le sch√©ma adapt√©"""
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()

    cursor.execute("""
    CREATE TABLE IF NOT EXISTS shows (
        show_id TEXT PRIMARY KEY,
        type TEXT NOT NULL,
        title TEXT NOT NULL,
        director TEXT,
        cast TEXT,
        country TEXT,
        date_added TEXT,
        release_year INTEGER,
        rating TEXT,
        duration TEXT,
        listed_in TEXT,
        description TEXT,
        added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
    """)

    # Cr√©er un index pour les requ√™tes fr√©quentes
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_type ON shows(type)")
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_country ON shows(country)")
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_year ON shows(release_year)")

    conn.commit()
    conn.close()

def clean_data(value):
    """Nettoie les valeurs du CSV (remplace les vides par NULL)"""
    return None if value == "" else value

def import_csv():
    """Import les donn√©es du CSV en g√©rant les valeurs vides"""
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()

    with open(CSV_FILE, mode='r', encoding='utf-8') as csv_file:
        csv_reader = csv.DictReader(csv_file)

        for i, row in enumerate(csv_reader):
            # Nettoyer les donn√©es
            clean_row = {k: clean_data(v) for k, v in row.items()}

            # Convertir la date au format ISO
            try:
                date_obj = datetime.strptime(clean_row['date_added'], "%B %d, %Y")
                clean_row['date_added'] = date_obj.strftime("%Y-%m-%d")
            except:
                pass  # Garder la date originale si le format est invalide

            # Ins√©rer dans la base
            cursor.execute("""
            INSERT OR IGNORE INTO shows (
                show_id, type, title, director, cast, country,
                date_added, release_year, rating, duration,
                listed_in, description
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (
                clean_row['show_id'],
                clean_row['type'],
                clean_row['title'],
                clean_row['director'],
                clean_row['cast'],
                clean_row['country'],
                clean_row['date_added'],
                int(clean_row['release_year']) if clean_row['release_year'] else None,
                clean_row['rating'],
                clean_row['duration'],
                clean_row['listed_in'],
                clean_row['description']
            ))

            if i % 1000 == 0:
                conn.commit()
                print(f"Import√© {i} lignes...")

    conn.commit()
    conn.close()
    print(f"‚úÖ Import termin√©: {i+1} lignes import√©es dans {DB_FILE}")

def verify_data():
    """V√©rifie les donn√©es import√©es"""
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()

    # Compter les entr√©es
    cursor.execute("SELECT COUNT(*) FROM shows")
    count = cursor.fetchone()[0]
    print(f"\nüìä Statistiques:")
    print(f"- Total: {count} films/s√©ries")

    # R√©partition par type
    cursor.execute("SELECT type, COUNT(*) FROM shows GROUP BY type")
    for type_, count in cursor:
        print(f"- {type_}: {count}")

    # Ann√©es de sortie
    cursor.execute("SELECT MIN(release_year), MAX(release_year) FROM shows")
    min_year, max_year = cursor.fetchone()
    print(f"- P√©riode: {min_year} √† {max_year}")

    # Pays les plus repr√©sent√©s
    cursor.execute("SELECT country, COUNT(*) FROM shows GROUP BY country ORDER BY COUNT(*) DESC LIMIT 3")
    print("\nüåç Top 3 pays:")
    for country, count in cursor:
        print(f"- {country}: {count}")

    conn.close()

if __name__ == "__main__":
    if not os.path.exists(CSV_FILE):
        print(f"‚ùå Fichier {CSV_FILE} introuvable!")
    else:
        print("üîß Cr√©ation de la base de donn√©es...")
        create_db()
        print("üì§ Import des donn√©es...")
        import_csv()
        verify_data()


üîß Cr√©ation de la base de donn√©es...
üì§ Import des donn√©es...
Import√© 0 lignes...
Import√© 1000 lignes...
Import√© 2000 lignes...
Import√© 3000 lignes...
Import√© 4000 lignes...
Import√© 5000 lignes...
Import√© 6000 lignes...
Import√© 7000 lignes...
Import√© 8000 lignes...
‚úÖ Import termin√©: 8807 lignes import√©es dans ../data/netflix.db

üìä Statistiques:
- Total: 8807 films/s√©ries
- Movie: 6131
- TV Show: 2676
- P√©riode: 1925 √† 2021

üåç Top 3 pays:
- United States: 2818
- India: 972
- None: 831


In [2]:
import json
from pathlib import Path

# ---- 1. The whole documentation as a Python dict (valid JSON) ----
omdb_doc = {
    "description": (
        "This documentation outlines the structure and specifications of the OMDB API "
        "(Open Movie Database). It is organized into two main dictionaries:\n"
        '- "arguments": Defines all input parameters accepted by the API, split into two query modes:\n'
        "    ‚Ä¢ by_id_or_title: Search by IMDb ID or exact title.\n"
        "    ‚Ä¢ by_search: Keyword-based search (fuzzy matching).\n"
        "  Each parameter includes: required status, data type, valid values, default value (if any), "
        "clear description, and example.\n"
        "  Note: In by_id_or_title mode, **at least one of 'i' or 't' is required**.\n"
        '- "responses": Describes the structure of the API\'s returned data, with data type '
        "and example for each field.\n"
        "These dictionaries enable building valid OMDB API requests and automatically parsing responses.\n\n"
        "**Important**: An API key (`apikey`) is **required** for all requests."
    ),
    "base_url": "http://www.omdbapi.com/",
    "arguments": {
        "by_id_or_title": {
            "apikey": {
                "required": True,
                "type": "str",
                "description": "API key required to access the service. Get one at http://www.omdbapi.com/apikey.aspx",
                "example": "apikey=12345678"
            },
            "i": {
                "required": False,
                "type": "str",
                "description": "IMDb ID of the movie, series, or episode (format: 'tt' followed by 7 or 8 digits). *At least 'i' or 't' is required.*",
                "example": "i=tt1285016"
            },
            "t": {
                "required": False,
                "type": "str",
                "description": "Exact title of the movie, series, or episode. *At least 'i' or 't' is required.*",
                "example": "t=Inception"
            },
            "type": {
                "required": False,
                "type": "str",
                "valid_options": ["movie", "series", "episode"],
                "default": None,
                "description": "Type of result to return (movie, series, or episode).",
                "example": "type=movie"
            },
            "y": {
                "required": False,
                "type": "str",
                "description": "Year of release (4-digit year).",
                "example": "y=2010"
            },
            "plot": {
                "required": False,
                "type": "str",
                "valid_options": ["short", "full"],
                "default": "short",
                "description": "Return a short or full plot summary.",
                "example": "plot=full"
            },
            "r": {
                "required": False,
                "type": "str",
                "valid_options": ["json", "xml"],
                "default": "json",
                "description": "Response data format.",
                "example": "r=json"
            },
            "callback": {
                "required": False,
                "type": "str",
                "description": "Callback function name for JSONP support.",
                "example": "callback=myFunction"
            },
            "v": {
                "required": False,
                "type": "str",
                "default": "1",
                "description": "API version (reserved for future use).",
                "example": "v=1"
            }
        },
        "by_search": {
            "apikey": {
                "required": True,
                "type": "str",
                "description": "API key required to access the service. Get one at http://www.omdbapi.com/apikey.aspx",
                "example": "apikey=12345678"
            },
            "s": {
                "required": True,
                "type": "str",
                "description": "Movie/series title to search for (keyword-based search).",
                "example": "s=Joker"
            },
            "type": {
                "required": False,
                "type": "str",
                "valid_options": ["movie", "series", "episode"],
                "default": None,
                "description": "Type of result to return (movie, series, or episode).",
                "example": "type=movie"
            },
            "y": {
                "required": False,
                "type": "str",
                "description": "Year of release (4-digit year).",
                "example": "y=2019"
            },
            "r": {
                "required": False,
                "type": "str",
                "valid_options": ["json", "xml"],
                "default": "json",
                "description": "Response data format.",
                "example": "r=json"
            },
            "page": {
                "required": False,
                "type": "int",
                "valid_options": list(range(1, 101)),
                "default": 1,
                "description": "Page number to return (1‚Äì100). Maximum 100 results per page.",
                "example": "page=2"
            },
            "callback": {
                "required": False,
                "type": "str",
                "description": "Callback function name for JSONP support.",
                "example": "callback=myFunction"
            },
            "v": {
                "required": False,
                "type": "str",
                "default": "1",
                "description": "API version (reserved for future use).",
                "example": "v=1"
            }
        }
    },
    "responses": {
        "Title": {"type": "str", "description": "Title of the movie or series.", "example": "Inception"},
        "Year": {"type": "str", "description": "Year of release (4-digit string).", "example": "2010"},
        "Rated": {"type": "str", "description": "MPAA rating (e.g., 'PG-13', 'R') or 'N/A'.", "example": "PG-13"},
        "Released": {"type": "str", "description": "Full release date (formatted as 'DD Mon YYYY').", "example": "16 Jul 2010"},
        "Runtime": {"type": "str", "description": "Runtime in minutes (with ' min' suffix).", "example": "148 min"},
        "Genre": {"type": "str", "description": "Comma-separated list of genres.", "example": "Action, Sci-Fi, Thriller"},
        "Director": {"type": "str", "description": "Comma-separated list of directors. 'N/A' if unknown.", "example": "Christopher Nolan"},
        "Writer": {"type": "str", "description": "Comma-separated list of writers. 'N/A' if unknown.", "example": "Christopher Nolan"},
        "Actors": {"type": "str", "description": "Comma-separated list of lead actors. 'N/A' if unknown.", "example": "Leonardo DiCaprio, Ken Watanabe, Joseph Gordon-Levitt"},
        "Plot": {"type": "str", "description": "Plot summary. Length depends on `plot=short` (default) or `plot=full`.", "example": "A thief who steals corporate secrets through dream-sharing technology..."},
        "Language": {"type": "str", "description": "Comma-separated list of languages.", "example": "English, Japanese, French"},
        "Country": {"type": "str", "description": "Comma-separated list of countries of origin.", "example": "USA, UK"},
        "Awards": {"type": "str", "description": "Awards won/nominated. 'N/A' if none.", "example": "Won 4 Oscars. Another 152 wins & 204 nominations."},
        "Poster": {"type": "str", "description": "URL to the poster image. 'N/A' if not available.", "example": "https://m.media-amazon.com/images/M/MV5BMjAxMzY3NjcxNF5BMl5BanBnXkFtZTcwNTI5OTM0Mw@@._V1_SX300.jpg"},
        "Ratings": {"type": "list[dict]", "description": "List of ratings from various sources. Each dict has 'Source' and 'Value'.", "example": [
            {"Source": "Internet Movie Database", "Value": "8.8/10"},
            {"Source": "Rotten Tomatoes", "Value": "87%"},
            {"Source": "Metacritic", "Value": "74/100"}
        ]},
        "Metascore": {"type": "str", "description": "Metacritic score (0‚Äì100). 'N/A' if not available.", "example": "74"},
        "imdbRating": {"type": "str", "description": "IMDb rating out of 10.", "example": "8.8"},
        "imdbVotes": {"type": "str", "description": "Number of votes on IMDb (with commas).", "example": "2,345,678"},
        "imdbID": {"type": "str", "description": "Unique IMDb ID (starts with 'tt').", "example": "tt1375666"},
        "Type": {"type": "str", "description": "Content type: 'movie', 'series', or 'episode'.", "example": "movie"},
        "DVD": {"type": "str", "description": "DVD release date ('DD Mon YYYY') or 'N/A'.", "example": "07 Dec 2010"},
        "BoxOffice": {"type": "str", "description": "Box office earnings (formatted with '$'). 'N/A' if unknown.", "example": "$292,576,195"},
        "Production": {"type": "str", "description": "Production company. 'N/A' if unknown.", "example": "Warner Bros., Legendary Entertainment"},
        "Website": {"type": "str", "description": "Official website URL. 'N/A' if none.", "example": "http://inceptionmovie.warnerbros.com/" },
        "Response": {"type": "str", "description": "Indicates if request was successful: 'True' or 'False'.", "example": "True"}
    }
}

# ---- 2. Write a *real* JSON file (pretty-printed) ----
path = Path("omdb_api_doc.json")
path.write_text(json.dumps(omdb_doc, indent=4), encoding="utf-8")
print(f"File written to: {path.resolve()}")

# ---- 3. Load it back (this will now work) ----
with path.open("r", encoding="utf-8") as f:
    loaded = json.load(f)

print("\nLoaded successfully!")
print("Title example:", loaded["responses"]["Title"]["example"])

File written to: C:\Users\Vincent\GitHub\Vincent-20-100\Agentic_Systems_Project_Vlamy\code\omdb_api_doc.json

Loaded successfully!
Title example: Inception
