In [103]:
import asyncio
from playwright.async_api import async_playwright

async def get_event_links(url):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url)

        # Scroll down to load all dynamic content
        previous_height = None
        while True:
            current_height = await page.evaluate("document.body.scrollHeight")
            if previous_height == current_height:
                break
            await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            await asyncio.sleep(1)
            previous_height = current_height

        # Extract links containing '/events/'
        links = await page.query_selector_all("a[href*='/events/']")
        event_links = [await link.get_attribute("href") for link in links]

        await browser.close()
        return event_links

url = "https://basementny.net/past"
events = await get_event_links(url)
print(events)

# Or, if you're running as a standalone script:
# asyncio.run(get_event_links(url))


['/events/63e285d2-4a4a-4133-8ae0-6a57f3dc911a', '/events/32006196-222a-40e9-be76-202d55f1f96c', '/events/f1778147-8f5e-4106-afac-fa787cef899c', '/events/808cc6ab-063a-453c-9f9d-8cfecb9752dc', '/events/e7b6ebe1-db40-451a-a23a-408809f7484d', '/events/219ea7f2-37af-49b4-af2c-abe78e540422', '/events/8b849243-d119-4e7a-85f7-8caec4a269ee', '/events/d1147ab9-5f08-4695-9ee6-9740085c2eb5', '/events/828a55dd-6c5c-4c9d-b02e-7b442ce6926a', '/events/a0dbdfc5-c4ed-4e04-a05e-c5ba728f1f91', '/events/40ac783a-34e7-4d3a-899b-03a6b0cdce3f', '/events/764eb181-5f70-4f60-86f9-12a95ad32877', '/events/0d837954-fdcc-4248-bae4-5cd2ddfd4a98', '/events/91eabd5a-e619-48f3-8826-15524e30af05', '/events/37114c04-60ae-4c89-9c4e-9596cd38f906', '/events/02bfc52d-6d5d-4e54-9da8-7c7820de00e9', '/events/02a5bfc0-cfff-4e4c-af4f-d06addbe5386', '/events/a5a2d3db-8b82-4a60-b4eb-336fb3850a6a', '/events/08286661-eafd-43c8-8bd8-17cbee3f22ed', '/events/fd18978d-921b-4794-96a0-5f6162ffb2a2', '/events/5cb44f6f-fdb3-488b-86ee-6a93ba

In [52]:
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil.parser import parse as parse_date
import nest_asyncio
nest_asyncio.apply()

async def get_event_detail(url):
    # Launch browser and navigate to the URL
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url)
        # Wait until the event date element is present, with an increased timeout
        await page.wait_for_selector("div.event-detail__date", timeout=9000)
        html = await page.content()
        await browser.close()
    
    # Parse the rendered HTML with BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
    
    # 1. Extract event detail date
    date_div = soup.find("div", class_="event-detail__date")
    if not date_div:
        raise ValueError("Could not find the event detail date element.")
    date_text = date_div.get_text(strip=True).title()
    # Use dateutil parser to handle multiple date formats
    event_date = parse_date(date_text)
    
    # 2. Extract event title
    title_h1 = soup.find("h1", class_="event-detail__title")
    if not title_h1:
        raise ValueError("Could not find the event detail title element.")
    event_title_div = title_h1.find("div", recursive=False)
    event_title = event_title_div.get_text(strip=True) if event_title_div else ""
    
    # 3. Extract event detail music information
    event_detail_music = []
    stage_divs = title_h1.find_all("div", class_=lambda c: c and "event-detail__title-stages" in c)
    for stage in stage_divs:
        container = stage.find("p") if stage.find("p") else stage
        headline_div = container.find("div", class_="event-detail__headline")
        if not headline_div:
            continue
        stage_name = headline_div.get_text(strip=True)
        dj_divs = container.find_all("div")
        dj_list = []
        for dj in dj_divs:
            if dj == headline_div:
                continue
            text = dj.get_text(strip=True)
            if not text or text.lower() == "null":
                continue
            if "event-detail__headline" in dj.get("class", []):
                continue
            dj_list.append(text)
        if stage_name and dj_list:
            event_detail_music.append({stage_name: dj_list})
    
    return {
        "event_detail_date": event_date,
        "event_detail_title": event_title,
        "event_detail_music": event_detail_music
    }

# Example usage:
if __name__ == "__main__":
    url = "https://basementny.net/events/3dd1e882-fb7d-4307-b0ae-d93bafaee185"
    event_data = asyncio.run(get_event_detail(url))
    print(event_data)

{'event_detail_date': datetime.datetime(2022, 3, 25, 22, 30), 'event_detail_title': 'FIST', 'event_detail_music': [{'Basement': ['BLUE HOUR', 'TYGAPAW', 'MEMPHY', 'DJ CLONE', 'WORD OF COMMAND']}]}


In [44]:
event_data_list = []

In [46]:
len(event_data_list)

150

In [45]:
for x in events[:150]:
    url = "https://basementny.net" + x
    event_data = asyncio.run(get_event_detail(url))
    print(event_data)
    event_data_list.append(event_data)

{'event_detail_date': datetime.datetime(2025, 3, 8, 22, 30), 'event_detail_title': 'March 8', 'event_detail_music': [{'Basement': ['DJ BONE', 'MIKE SERVITO', 'ROSE KOURTS']}, {'Studio': ['FKA.M4A', 'JASON KENDIG', 'COSMO']}]}
{'event_detail_date': datetime.datetime(2025, 3, 7, 22, 30), 'event_detail_title': 'BASSIANI NIGHT', 'event_detail_music': [{'Basement': ['NEWA B2B NDRX', 'KANCHELI B2B NEBBIERI']}, {'Studio': ['GABRIELLE KWARTENG', '98DOTS', 'DJ SURGE']}]}
{'event_detail_date': datetime.datetime(2025, 3, 1, 22, 30), 'event_detail_title': 'ALTÆR 5 Year Anniversary', 'event_detail_music': [{'Basement': ['LADY STARLIGHT LIVE', 'STERAC', 'AUSPEX']}, {'Studio': ['DAVID ELIMELECH', 'LOREN', "S'AINT PANIC"]}]}
{'event_detail_date': datetime.datetime(2025, 2, 28, 22, 30), 'event_detail_title': 'X-TRA.SERVICES', 'event_detail_music': [{'Basement': ['STEF MENDESIDIS LIVE', 'YANT', 'ÆON']}, {'Studio': ['HAAI', 'K WATA', 'LYDO']}]}
{'event_detail_date': datetime.datetime(2025, 2, 22, 22, 30)

In [54]:
events[214]

'/events/a3b33be9-aeba-438a-a74d-3e3b37d39a4c'

In [55]:
for x in events[214:]:
    url = "https://basementny.net" + x
    event_data = asyncio.run(get_event_detail(url))
    # print(url)
    print(event_data)
    event_data_list.append(event_data)

{'event_detail_date': datetime.datetime(2022, 9, 16, 22, 30), 'event_detail_title': 'September 16', 'event_detail_music': [{'Basement': ['WATA IGARASHI', 'ALEJANDRO FRANCO', 'ANTENES']}, {'Studio': ['THE CARRY NATION', 'DEE DIGGS', 'KILOPATRAH JONES']}]}
{'event_detail_date': datetime.datetime(2022, 9, 10, 0, 0), 'event_detail_title': 'ALTÆR', 'event_detail_music': [{'Basement': ['REGAL', 'MADWOMAN', 'AUSPEX']}, {'Studio': ['EFDEMIN', 'LAUREN FLAX']}]}
{'event_detail_date': datetime.datetime(2022, 9, 10, 22, 30), 'event_detail_title': 'FIST', 'event_detail_music': [{'Basement': ['RANDOMER', 'PHILIPPA PACHO', 'WORD OF COMMAND', 'DJ CLONE']}, {'Studio': ['KIM ANN FOXMAN', 'ONIO', 'ULTRATHEM']}]}
{'event_detail_date': datetime.datetime(2022, 9, 3, 0, 0), 'event_detail_title': 'FAST FORWARD NIGHT', 'event_detail_music': [{'Basement': ['SUGAR', 'EZY', 'TT']}, {'Studio': ['SCHACKE', 'PEACHLYFE']}]}
{'event_detail_date': datetime.datetime(2022, 9, 2, 22, 0), 'event_detail_title': 'September 2

In [71]:
event_data_list_storage = event_data_list.copy()

In [66]:
event_data_list

[{'event_detail_date': datetime.datetime(2025, 3, 8, 22, 30),
  'event_detail_title': 'March 8',
  'event_detail_music': [{'Basement': ['DJ BONE',
     'MIKE SERVITO',
     'ROSE KOURTS']},
   {'Studio': ['FKA.M4A', 'JASON KENDIG', 'COSMO']}]},
 {'event_detail_date': datetime.datetime(2025, 3, 7, 22, 30),
  'event_detail_title': 'BASSIANI NIGHT',
  'event_detail_music': [{'Basement': ['NEWA B2B NDRX',
     'KANCHELI B2B NEBBIERI']},
   {'Studio': ['GABRIELLE KWARTENG', '98DOTS', 'DJ SURGE']}]},
 {'event_detail_date': datetime.datetime(2025, 3, 1, 22, 30),
  'event_detail_title': 'ALTÆR 5 Year Anniversary',
  'event_detail_music': [{'Basement': ['LADY STARLIGHT LIVE',
     'STERAC',
     'AUSPEX']},
   {'Studio': ['DAVID ELIMELECH', 'LOREN', "S'AINT PANIC"]}]},
 {'event_detail_date': datetime.datetime(2025, 2, 28, 22, 30),
  'event_detail_title': 'X-TRA.SERVICES',
  'event_detail_music': [{'Basement': ['STEF MENDESIDIS LIVE', 'YANT', 'ÆON']},
   {'Studio': ['HAAI', 'K WATA', 'LYDO']}]},

In [93]:
import re

for event in event_data_list:
    title = event.get("event_detail_title", "")
    if re.search(r'\b(january|february)\b', title, re.IGNORECASE): event["event_detail_title"] = ""
    else:
        if re.search(r'\bbasement', title, re.IGNORECASE):
            if not re.search(r'\banniversary\b', title, re.IGNORECASE):
                event["event_detail_title"] = ""

for event in event_data_list:
    title = event.get("event_detail_title", "")
    if re.search(r"MORD", title, re.IGNORECASE):
        event["event_detail_title"] = "MORD NIGHT"
    elif re.search(r"BASEMENT", title, re.IGNORECASE):
        event["event_detail_title"] = "BASEMENT ANNIVERSARY"
    elif re.search(r"WRECKED", title, re.IGNORECASE):
        event["event_detail_title"] = "WRECKED"
    elif re.search(r"BOUND", title, re.IGNORECASE):
        event["event_detail_title"] = "BOUND BLACKOUT"
    elif re.search(r"L\.I\.E\.S\.", title, re.IGNORECASE):
        event["event_detail_title"] = "L.I.E.S. NIGHT"
    elif re.search(r"ALTÆR", title, re.IGNORECASE):
        event["event_detail_title"] = "ALTÆR"
    elif re.search(r"FIST", title, re.IGNORECASE):
        event["event_detail_title"] = "FIST"


# Transformation for event_detail_music
for event in event_data_list:
    # Iterate over each sub-dictionary in the event_detail_music list.
    for stage_dict in event["event_detail_music"]:
        for stage, dj_list in stage_dict.items():
            # First, check for the NE/RE/A case.
            lower_djs = [dj.strip().lower() for dj in dj_list]
            if {"ne", "re", "a"}.issubset(lower_djs):
                dj_list = ["NE/RE/A"]
            else:
                # Otherwise, update the DJ list by splitting any string containing "B2B".
                new_dj_list = []
                for dj in dj_list:
                    if "b2b" in dj.lower():
                        # Use a case-insensitive split on "B2B"
                        parts = re.split("(?i)b2b", dj)
                        if len(parts) >= 2:
                            # First part: trim trailing whitespace.
                            new_dj1 = parts[0].rstrip()
                            # Second part: trim leading whitespace.
                            new_dj2 = parts[1].lstrip()
                            if new_dj1:
                                new_dj_list.append(new_dj1)
                            if new_dj2:
                                new_dj_list.append(new_dj2)
                        else:
                            new_dj_list.append(dj)
                    else:
                        new_dj_list.append(dj)
                dj_list = new_dj_list
            
            # Next, split any DJ string containing " / " as a delimiter.
            new_dj_list = []
            for dj in dj_list:
                if " / " in dj:
                    # Split the string on " / " and strip each part.
                    parts = [part.strip() for part in dj.split(" / ") if part.strip()]
                    new_dj_list.extend(parts)
                else:
                    new_dj_list.append(dj)
            dj_list = new_dj_list
            
            # Finally, convert every DJ string to uppercase.
            dj_list = [dj.upper() for dj in dj_list]
            
            # Update the stage dictionary.
            stage_dict[stage] = dj_list

for event in event_data_list:
    for stage_dict in event["event_detail_music"]:
        for stage, dj_list in stage_dict.items():
            new_dj_list = []
            for dj in dj_list:
                dj_stripped = dj.strip()
                dj_upper = dj_stripped.upper()
                if dj_stripped.lower() == "ne/re/a /".strip().lower(): new_dj_list.append("NE/RE/A")
                elif dj_upper == "ROI PEREZ ALL NIGHT LONG": new_dj_list.append("ROI PEREZ")
                elif dj_upper == "WRECKED ALL NIGHT LONG (RON LIKE HELL & RYAN SMITH)": new_dj_list.extend(["RON LIKE HELL", "RYAN SMITH"])
                elif dj_upper == "VINCENT NEUMANN  JUSTIN CUDMORE": new_dj_list.extend(["VINCENT NEUMANN", "JUSTIN CUDMORE"])
                elif dj_upper == "THE SPECTRUM:  NDRX": new_dj_list.append("NDRX")
                elif dj_upper == "FREDDY K OPEN TO CLOSE": new_dj_list.append("FREDDY K")
                elif dj_upper == "LINDO (JOTA": new_dj_list.append("LINDO (JOTA B2B VICENTA)")
                elif dj_upper == "VICENTA)": continue
                elif dj_upper == "WRECKED": continue
                elif dj_upper == "WORLD OF COMMAND": new_dj_list.append("WORD OF COMMAND")
                elif dj_upper == "VOLVOX FEATURING DOMI": new_dj_list.append("VOLVOX")
                elif dj_upper == "SHAUN J WRIGHT": new_dj_list.append("SHAUN J. WRIGHT")
                elif dj_upper == "PAUL FLEETWOOD (PERIMETER JUNK)": new_dj_list.append("PAUL FLEETWOOD")
                elif dj_upper == "NICOLO BERNARDI": new_dj_list.append("NICOLÒ BERNARDI")
                elif dj_upper == "KATIE REX WITH PANTERAH LIVE": new_dj_list.append("KATIE REX")
                elif dj_upper == "JACOB MEEHAN)": new_dj_list.append("JACOB MEEHAN")
                elif dj_upper == "GABRIELLA KWARTENG": new_dj_list.append("GABRIELLE KWARTENG")
                elif dj_upper == "AMX AKA THE AM": new_dj_list.append("AMX")
                elif dj_upper == "ALARICO LIVE": new_dj_list.append("ALARICO")
                elif dj_upper == "STEF MENDESIDIS LIVE": new_dj_list.append("STEF MENDESIDIS")
                else: new_dj_list.append(dj)
            stage_dict[stage] = new_dj_list

# Now event_data_list has been updated.
event_data_list

[{'event_detail_date': datetime.datetime(2025, 3, 8, 22, 30),
  'event_detail_title': '',
  'event_detail_music': [{'Basement': ['DJ BONE',
     'MIKE SERVITO',
     'ROSE KOURTS']},
   {'Studio': ['FKA.M4A', 'JASON KENDIG', 'COSMO']}]},
 {'event_detail_date': datetime.datetime(2025, 3, 7, 22, 30),
  'event_detail_title': 'BASSIANI NIGHT',
  'event_detail_music': [{'Basement': ['NEWA',
     'NDRX',
     'KANCHELI',
     'NEBBIERI']},
   {'Studio': ['GABRIELLE KWARTENG', '98DOTS', 'DJ SURGE']}]},
 {'event_detail_date': datetime.datetime(2025, 3, 1, 22, 30),
  'event_detail_title': 'ALTÆR',
  'event_detail_music': [{'Basement': ['LADY STARLIGHT LIVE',
     'STERAC',
     'AUSPEX']},
   {'Studio': ['DAVID ELIMELECH', 'LOREN', "S'AINT PANIC"]}]},
 {'event_detail_date': datetime.datetime(2025, 2, 28, 22, 30),
  'event_detail_title': 'X-TRA.SERVICES',
  'event_detail_music': [{'Basement': ['STEF MENDESIDIS', 'YANT', 'ÆON']},
   {'Studio': ['HAAI', 'K WATA', 'LYDO']}]},
 {'event_detail_date':

In [94]:
import pandas as pd

# Suppose event_data_list is already defined

rows = []
for event in event_data_list:
    date = event["event_detail_date"]
    title = event["event_detail_title"]
    for stage_dict in event["event_detail_music"]:
        for stage, dj_list in stage_dict.items():
            for dj in dj_list:
                rows.append({
                    "date": date,
                    "event_title": title,
                    "stage": stage,
                    "DJ": dj
                })

df = pd.DataFrame(rows)
print(df)

                    date     event_title     stage             DJ
0    2025-03-08 22:30:00                  Basement        DJ BONE
1    2025-03-08 22:30:00                  Basement   MIKE SERVITO
2    2025-03-08 22:30:00                  Basement    ROSE KOURTS
3    2025-03-08 22:30:00                    Studio        FKA.M4A
4    2025-03-08 22:30:00                    Studio   JASON KENDIG
...                  ...             ...       ...            ...
1443 2021-12-11 22:30:00  BOUND BLACKOUT  Basement  FALSE WITNESS
1444 2021-12-11 22:30:00  BOUND BLACKOUT  Basement      KATIE REX
1445 2021-12-11 22:30:00  BOUND BLACKOUT  Basement     NIGHT DOLL
1446 2021-12-04 22:30:00                  Basement           PERC
1447 2021-12-04 22:30:00                  Basement        ANTPUKE

[1448 rows x 4 columns]


In [64]:
pip install openpyxl

Defaulting to user installation because normal site-packages is not writeable
Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [95]:
df.to_excel("event_data.xlsx", index=False)

In [115]:
def split_by_basement(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    blocks = []
    current_block = []
    for line in lines:
        # Check if the stripped line is exactly "BASEMENT"
        if line.strip() == "BASEMENT":
            # If we have accumulated lines, join them into one string and add to blocks
            if current_block:
                blocks.append("".join(current_block).rstrip())
                current_block = []
        else:
            current_block.append(line)
    
    # Optionally, add any remaining content as the last block
    if current_block:
        blocks.append("".join(current_block).rstrip())
    
    return blocks

# Example usage:
if __name__ == "__main__":
    file_path = "basement_text.txt"  # Replace with your file path
    blocks = split_by_basement(file_path)
    for i, block in enumerate(blocks, start=1):
        print(f"Block {i}:\n{block}\n{'-'*40}\n")


Block 1:
̸November 2021
RA Pick
'Wrecked: TYGAPAW / Ron Like Hell / Ryan Smith' flyer image
Sat, 20 Nov 2021
Wrecked: TYGAPAW / Ron Like Hell / Ryan Smith

̸
TYGAPAW, Ron Like Hell, Ryan Smith
New York City
----------------------------------------

Block 2:
787
Wrecked bosses Ron Like Hell and Ryan Smith invite the Jamaican-born, New York-based powerhouse to their long-running monthly party for a night of raw and relentless energy.
'Sonic Groove Night: Adam X / Frankie Bones / MAEDON / Realmz' flyer image
Sat, 13 Nov 2021
Sonic Groove Night: Adam X / Frankie Bones / MAEDON / Realmz

̸
Frankie Bones, MAEDON, Realmz
New York City
----------------------------------------

Block 3:
442
'Fist: Isabella / Loka / Miss Parker / Joselo b2b Fried Plátano / Word of Command b2b DJ Clone' flyer image
Sat, 6 Nov 2021
Fist: Isabella / Loka / Miss Parker / Joselo b2b Fried Plátano / Word of Command b2b DJ Clone

̸
Miss Parker, JOSELO, Word of Command, DJ Clone, DJ Clone
New York City
-----------------

In [125]:
import re

def parse_block(block):
    # Split the block into individual lines
    lines = block.splitlines()
    
    # 1. Find the date: the first line that matches a day-of-week pattern.
    date_pattern = re.compile(r"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+\d{1,2}\s+\w+\s+\d{4}$")
    date_line = None
    for line in lines:
        candidate = line.strip()
        if date_pattern.match(candidate):
            date_line = candidate
            break

    # 2. The second element: the line immediately above "New York City"
    ny_index = None
    for i, line in enumerate(lines):
        if line.strip() == "New York City":
            ny_index = i
            break
    ny_above = None
    if ny_index is not None:
        # Go backwards to find the first non-empty line before "New York City"
        j = ny_index - 1
        while j >= 0 and lines[j].strip() == "":
            j -= 1
        if j >= 0:
            ny_above = lines[j].strip()
    
    # 3. The third element: the last nonempty line before a delimiter "̸"
    delim_index = None
    for i, line in enumerate(lines):
        if line.strip() == "̸":
            delim_index = i
            break
    above_delim = None
    if delim_index is not None:
        # Look upward for the last non-empty line prior to the delimiter
        j = delim_index - 1
        while j >= 0 and lines[j].strip() == "":
            j -= 1
        if j >= 0:
            above_delim = lines[j].strip()
    
    return [date_line, ny_above, above_delim]

# Process each block and print the resulting list of three elements
parsed_blocks = [parse_block(block) for block in blocks][:-1]

# for i, elements in enumerate(parsed_blocks, start=1):
#     print(f"Block {i}:")
#     print(" Date:", elements[0])
#     print(" Above New York City:", elements[1])
#     print(" Above delimiter:", elements[2])
#     print("-" * 40)

parsed_blocks

[['Sat, 20 Nov 2021',
  'TYGAPAW, Ron Like Hell, Ryan Smith',
  'Wrecked: TYGAPAW / Ron Like Hell / Ryan Smith'],
 ['Sat, 13 Nov 2021',
  'Frankie Bones, MAEDON, Realmz',
  'Sonic Groove Night: Adam X / Frankie Bones / MAEDON / Realmz'],
 ['Sat, 6 Nov 2021',
  'Miss Parker, JOSELO, Word of Command, DJ Clone, DJ Clone',
  'Fist: Isabella / Loka / Miss Parker / Joselo b2b Fried Plátano / Word of Command b2b DJ Clone'],
 ['Sat, 23 Oct 2021',
  'D.Dan, Katie Rex, DAIYAH',
  'Bound Blackout: D.Dan / Katie Rex / Daiyah'],
 ['Fri, 22 Oct 2021', 'FJAAK, Umfang, Akua', 'FJAAK / Umfang / Akua'],
 ['Sat, 16 Oct 2021', 'Ron Like Hell, Ryan Smith', 'Wrecked'],
 ['Sat, 9 Oct 2021',
  'Sara Landry, Volvox, Juana',
  'Sara Landry / Volvox / Juana'],
 ['Sat, 2 Oct 2021', 'Function', 'Function'],
 ['Sat, 25 Sep 2021',
  'Honcho, Ron Like Hell, Ryan Smith',
  'Wrecked with Honcho'],
 ['Fri, 24 Sep 2021',
  'Robert Hood, Aurora Halal, FadeFace',
  'Robert Hood / Aurora Halal / Fadeface'],
 ['Fri, 17 Sep 2

In [129]:
processed_data = []
for block in parsed_blocks:
    processed_data.append([parse_date(block[0]), [name.strip() for name in block[1].split(',')], block[2]])
for item in processed_data:
    print(item)

[datetime.datetime(2021, 11, 20, 0, 0), ['TYGAPAW', 'Ron Like Hell', 'Ryan Smith'], 'Wrecked: TYGAPAW / Ron Like Hell / Ryan Smith']
[datetime.datetime(2021, 11, 13, 0, 0), ['Frankie Bones', 'MAEDON', 'Realmz'], 'Sonic Groove Night: Adam X / Frankie Bones / MAEDON / Realmz']
[datetime.datetime(2021, 11, 6, 0, 0), ['Miss Parker', 'JOSELO', 'Word of Command', 'DJ Clone', 'DJ Clone'], 'Fist: Isabella / Loka / Miss Parker / Joselo b2b Fried Plátano / Word of Command b2b DJ Clone']
[datetime.datetime(2021, 10, 23, 0, 0), ['D.Dan', 'Katie Rex', 'DAIYAH'], 'Bound Blackout: D.Dan / Katie Rex / Daiyah']
[datetime.datetime(2021, 10, 22, 0, 0), ['FJAAK', 'Umfang', 'Akua'], 'FJAAK / Umfang / Akua']
[datetime.datetime(2021, 10, 16, 0, 0), ['Ron Like Hell', 'Ryan Smith'], 'Wrecked']
[datetime.datetime(2021, 10, 9, 0, 0), ['Sara Landry', 'Volvox', 'Juana'], 'Sara Landry / Volvox / Juana']
[datetime.datetime(2021, 10, 2, 0, 0), ['Function'], 'Function']
[datetime.datetime(2021, 9, 25, 0, 0), ['Honcho'

In [135]:
def clean_parsed_blocks(blocks):
    cleaned = []
    for block in blocks:
        event_title = block[2]
        lower_title = event_title.lower()
        # Drop the block if it contains "postponed" or "cancelled"
        if "postponed" in lower_title or "cancelled" in lower_title:
            continue
        # Apply recasting rules
        if "wrecked" in lower_title: block[2] = "WRECKED"
        elif "bound" in lower_title: block[2] = "BOUND BLACKOUT"
        elif "altær" in lower_title: block[2] = "ALTÆR"
        elif "sonic groove" in lower_title: block[2] = "SONIC GROOVE NIGHT"
        elif "fist" in lower_title: block[2] = "FIST"
        elif "balance" in lower_title: block[2] = "BALANCE"
        elif "bunker" in lower_title: block[2] = "THE BUNKER"
        elif "quo" in lower_title: block[2] = "QUO VADIS"
        elif "bite" in lower_title: block[2] = "BITE NITE"
        elif "united" in lower_title: block[2] = "UNITED WE STREAM"
        cleaned.append(block)
    return cleaned

cleaned_blocks = clean_parsed_blocks(processed_data)
cleaned_blocks

[[datetime.datetime(2021, 11, 20, 0, 0),
  ['TYGAPAW', 'Ron Like Hell', 'Ryan Smith'],
  'WRECKED'],
 [datetime.datetime(2021, 11, 13, 0, 0),
  ['Frankie Bones', 'MAEDON', 'Realmz'],
  'SONIC GROOVE NIGHT'],
 [datetime.datetime(2021, 11, 6, 0, 0),
  ['Miss Parker', 'JOSELO', 'Word of Command', 'DJ Clone', 'DJ Clone'],
  'FIST'],
 [datetime.datetime(2021, 10, 23, 0, 0),
  ['D.Dan', 'Katie Rex', 'DAIYAH'],
  'BOUND BLACKOUT'],
 [datetime.datetime(2021, 10, 22, 0, 0),
  ['FJAAK', 'Umfang', 'Akua'],
  'FJAAK / Umfang / Akua'],
 [datetime.datetime(2021, 10, 16, 0, 0),
  ['Ron Like Hell', 'Ryan Smith'],
  'WRECKED'],
 [datetime.datetime(2021, 10, 9, 0, 0),
  ['Sara Landry', 'Volvox', 'Juana'],
  'Sara Landry / Volvox / Juana'],
 [datetime.datetime(2021, 10, 2, 0, 0), ['Function'], 'Function'],
 [datetime.datetime(2021, 9, 25, 0, 0),
  ['Honcho', 'Ron Like Hell', 'Ryan Smith'],
  'WRECKED'],
 [datetime.datetime(2021, 9, 24, 0, 0),
  ['Robert Hood', 'Aurora Halal', 'FadeFace'],
  'Robert Hood 

In [133]:
def create_event_dataframe(data):
    # Create an empty list to store rows
    rows = []
    
    # Loop over each record in the data
    for record in data:
        date_val = record[0]          # First element: datetime object
        dj_list = record[1]           # Second element: list of DJs
        event_title = record[2]       # Third element: event title string
        
        # For each DJ in the dj_list, create a row
        for dj in dj_list:
            row = {
                "Date": date_val,
                "event_title": event_title,
                "stage": "BASEMENT",
                "DJ": dj
            }
            rows.append(row)
    
    # Create a DataFrame from the list of rows
    df = pd.DataFrame(rows)
    return df

xdf = create_event_dataframe(cleaned_blocks)
xdf.to_excel("event_data_x.xlsx")

In [None]:
from googlesearch import search
import pandas as pd

# List of DJ names
names = [
    'RON LIKE HELL', 'RYAN SMITH', 'WORD OF COMMAND', 'LYDO', 'DJ CLONE', 'MIKE SERVITO', 'KATIE REX', 
    'AUSPEX', 'VOLVOX', 'JUSTIN CUDMORE', 'JASON KENDIG', 'FUNCTION', 'ELLE DEE', 'THE CARRY NATION', 
    'UMFANG', 'KYRUH', 'JEK', 'HARUKA', 'DEE DIGGS', 'ANALOG SOUL', 'ROSE KOURTS', 'RACHEL NOON', 'NE/RE/A', 
    'MATAS', 'JUANA', 'TT', 'NDRX', 'LYCHEE', 'AKUA', 'YOUNG MALE', 'X3BUTTERFLY', 'SHAUN J. WRIGHT', 
    'SEKUCCI', 'SCHACKE', 'PARTOK', 'NEWA', 'COSMO', 'CLARISA KIMSKII', 'MADISON MOORE', 
    'KILOPATRAH JONES', 'JULIANA HUXTABLE', 'IAN CRANE', 'DEVOYE', 'DAIYAH', 'ANCIENT METHODS', 'ANA B', 
    'XIORRO', 'WTCHCRFT', 'PHASE FATALE', 'D.DAN', 'ANTHONY PARASOLE', 'ANNIE LEW', 'ÆON', 
    'VERONICA VASICKA', 'PERC', 'FIEDEL', 'COTTON', 'ANTENES', 'STERLING JUAN DIAZ', 
    'MASSIMILIANO PAGLIARA', 'JADALAREIGN', 'SISTER ZO', 'MEILGAARDEN', 'MANU MIRAN', 'KIM ANH', 
    'KEVIN AVIANCE', 'ROI PEREZ', 'NATUTA', 'MUTUALISM', 'MAKADSI', 'LAUREN FLAX', 'JACOB MEEHAN', 
    'ELI ESCOBAR', 'DJ SALIVA', 'CHRIS CRUSE'
]

# Create a list to store results
results = []

# Iterate over each name and search for "name soundcloud"
for name in names:
    query = f"{name} soundcloud"
    try:
        # Retrieve the first search result
        url = next(search(query, num_results=1, lang="en"))
    except StopIteration:
        url = "No results found."
    
    results.append({
        "DJ": name,
        "Soundcloud Link": url
    })

# Create a DataFrame from the results list
df = pd.DataFrame(results, columns=["DJ", "Soundcloud Link"])

# Display the DataFrame
print(df)


