In [3]:
# Michael Hernandez — CIS 9650 Project (Marvel vs DC)
# BoxOffice data only, save one clean CSV

import time, math, requests, pandas as pd
from config import omdb_api_key as API_KEY

BASE_URL = "https://www.omdbapi.com/"

# === 1) Two full lists ===
dc_list = [
    "The Dark Knight (2008)", "Wonder Woman (2017)", "Teen Titans Go! To the Movies (2018)", "Shazam! (2019)",
    "The Suicide Squad (2021)", "The LEGO Batman Movie (2017)", "A History of Violence (2005)", "Superman II (1980)",
    "The Dark Knight Rises (2012)", "Superman: The Movie (1978)", "The Batman (2022)", "Batman Begins (2005)",
    "Batman: Mask of the Phantasm (1993)", "Superman (2025)", "Road to Perdition (2002)", "Batman Returns (1992)",
    "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn) (2020)", "Blue Beetle (2023)", "Batman (1989)",
    "Superman Returns (2006)", "V for Vendetta (2005)", "Red (2010)", "Batman: Return of the Caped Crusaders (2016)",
    "Batman (1966)", "DC League of Super-Pets (2022)", "Zack Snyder’s Justice League (2021)", "Joker (2019)",
    "Aquaman (2018)", "Watchmen (2009)", "The Flash (2023)", "Swamp Thing (1982)", "Wonder Woman 1984 (2020)",
    "Man of Steel (2013)", "The Return of Swamp Thing (1989)", "Batman and Harley Quinn (2017)",
    "Shazam! Fury of the Gods (2023)", "The Losers (2010)", "Constantine (2005)", "Red 2 (2013)", "Batman Forever (1995)",
    "Justice League (2017)", "Black Adam (2022)", "Batman: The Killing Joke (2016)", "Aquaman and the Lost Kingdom (2023)",
    "Joker: Folie à Deux (2024)", "Superman III (1983)", "Batman v Superman: Dawn of Justice (2016)", "Suicide Squad (2016)",
    "Green Lantern (2011)", "The Kitchen (2019)", "Catwoman (2004)", "Jonah Hex (2010)", "Steel (1997)",
    "Superman IV: The Quest for Peace (1987)", "Batman & Robin (1997)", "Batman & Robin (1966)",
    "Superman and the Mole Men (1951)"
]

marvel_list = [
    "Howard the Duck (1986)", "Daredevil (2003)", "Elektra (2005)", "The Punisher (2004)", "Punisher: War Zone (2008)",
    "Ghost Rider (2007)", "Ghost Rider: Spirit of Vengeance (2012)", "Blade (1998)", "Blade II (2002)", "Blade: Trinity (2004)",
    "X-Men (2000)", "X2: X-Men United (2003)", "X-Men: The Last Stand (2006)", "X-Men Origins: Wolverine (2009)",
    "X-Men: First Class (2011)", "The Wolverine (2013)", "X-Men: Days of Future Past (2014)", "Deadpool (2016)",
    "X-Men: Apocalypse (2016)", "Logan (2017)", "Deadpool 2 (2018)", "X-Men: Dark Phoenix (2019)", "The New Mutants (2020)",
    "Spider-Man (2002)", "Spider-Man 2 (2004)", "Spider-Man 3 (2007)", "The Amazing Spider-Man (2012)",
    "The Amazing Spider-Man 2 (2014)", "Venom (2018)", "Venom: Let There Be Carnage (2021)", "Morbius (2022)",
    "Madame Web (2024)", "Venom: The Last Dance (2024)", "Kraven the Hunter (2024)", "Iron Man (2008)",
    "The Incredible Hulk (2008)", "Iron Man 2 (2010)", "Thor (2011)", "Captain America: The First Avenger (2011)",
    "The Avengers (2012)", "Iron Man 3 (2013)", "Thor: The Dark World (2013)", "Captain America: The Winter Soldier (2014)",
    "Guardians of the Galaxy (2014)", "Avengers: Age of Ultron (2015)", "Ant-Man (2015)", "Captain America: Civil War (2016)",
    "Doctor Strange (2016)", "Guardians of the Galaxy Vol. 2 (2017)", "Spider-Man: Homecoming (2017)",
    "Thor: Ragnarok (2017)", "Black Panther (2018)", "Avengers: Infinity War (2018)", "Ant-Man and the Wasp (2018)",
    "Captain Marvel (2019)", "Avengers: Endgame (2019)", "Spider-Man: Far From Home (2019)", "Black Widow (2021)",
    "Shang-Chi and the Legend of the Ten Rings (2021)", "Eternals (2021)", "Spider-Man: No Way Home (2021)",
    "Doctor Strange in the Multiverse of Madness (2022)", "Thor: Love and Thunder (2022)", "Black Panther: Wakanda Forever (2022)",
    "Ant-Man and the Wasp: Quantumania (2023)", "Guardians of the Galaxy Vol. 3 (2023)", "The Marvels (2023)",
    "Deadpool & Wolverine (2024)", "Captain America: Brave New World (2025)", "Thunderbolts (2025)"
]

# === 2) Helper functions ===
def split_title_year(s):
    if s.endswith(")") and "(" in s[-7:]:
        i = s.rfind("(")
        return s[:i].strip(), s[i+1:-1]
    return s, None

def parse_money(s):
    if not s or s == "N/A":
        return math.nan
    try:
        return int(s.replace("$", "").replace(",", "").strip())
    except:
        return math.nan

def omdb_fetch(title, year_hint=None):
    params = {"t": title, "type": "movie", "apikey": API_KEY}
    if year_hint and year_hint.isdigit():
        params["y"] = year_hint
    for _ in range(2):
        r = requests.get(BASE_URL, params=params, timeout=20)
        if r.status_code == 200:
            data = r.json()
            if data.get("Response") == "True":
                return data
        params.pop("y", None)
        time.sleep(0.2)
    return None

# === 3) Build combined dataframe ===
records = []
for brand, movielist in [("DC", dc_list), ("Marvel", marvel_list)]:
    for raw in movielist:
        t, y = split_title_year(raw)
        records.append({"brand": brand, "title": t, "year_hint": y})

df = pd.DataFrame(records)

# === 4) Fetch only key info from OMDb ===
out = []
print("Fetching movie data...")

for i, r in df.iterrows():
    data = omdb_fetch(r["title"], r["year_hint"])
    if not data or data.get("BoxOffice") in [None, "N/A"]:
        continue  # skip if no box office data
    rec = {
        "brand": r["brand"],
        "Title": data.get("Title"),
        "Year": data.get("Year"),
        "BoxOffice": parse_money(data.get("BoxOffice")),
    }
    out.append(rec)
    print(f"[{i+1}/{len(df)}] {r['brand']} – {r['title']} → {data.get('BoxOffice')}")
    time.sleep(0.1)

movies = pd.DataFrame(out)

# === 5) Save one clean numeric CSV ===
movies["Year"] = pd.to_numeric(movies["Year"], errors="coerce")
movies = movies.dropna(subset=["BoxOffice"])
movies = movies[["brand", "Title", "Year", "BoxOffice"]]  # keep only clean columns
movies.to_csv("marvel_dc_boxoffice.csv", index=False)

print(f"\n✅ Saved clean file 'marvel_dc_boxoffice.csv' with {len(movies)} movies.")



Fetching movie data...
[1/127] DC – The Dark Knight → $534,987,076
[2/127] DC – Wonder Woman → $412,845,172
[3/127] DC – Teen Titans Go! To the Movies → $29,790,236
[4/127] DC – Shazam! → $140,480,049
[5/127] DC – The Suicide Squad → $55,817,425
[6/127] DC – The LEGO Batman Movie → $175,936,671
[7/127] DC – A History of Violence → $31,504,633
[8/127] DC – Superman II → $108,185,706
[9/127] DC – The Dark Knight Rises → $448,149,584
[11/127] DC – The Batman → $369,345,583
[12/127] DC – Batman Begins → $206,863,479
[13/127] DC – Batman: Mask of the Phantasm → $5,635,204
[14/127] DC – Superman → $353,980,047
[15/127] DC – Road to Perdition → $104,454,762
[16/127] DC – Batman Returns → $162,924,631
[17/127] DC – Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn) → $84,172,791
[18/127] DC – Blue Beetle → $72,488,072
[19/127] DC – Batman → $251,409,241
[20/127] DC – Superman Returns → $200,081,192
[21/127] DC – V for Vendetta → $70,511,035
[22/127] DC – Red → $90,380,162
[25