
## Script to parse the html page: https://oyc.yale.edu/courses


In [1]:
# from html.parser import HTMLParser
from bs4 import BeautifulSoup
import urllib.request
from urllib.error import URLError, HTTPError
import pandas as pd
import csv
import pprint
import re
import os
from socket import timeout
import logging
import datetime
import subprocess

In [2]:
! conda env list
! conda list | grep urllib


# conda environments:
#
                         /home/connes-v/.platformio/penv
base                  *  /home/connes-v/anaconda3
deepnumpy                /home/connes-v/anaconda3/envs/deepnumpy
tensorflow_cpu           /home/connes-v/anaconda3/envs/tensorflow_cpu

urllib3                   1.24.2                   py37_0  


In [27]:
lengths = {}
def get_length(filename):
    if filename == "http://FILENAME.mp4":
        print("without video")
        return None
    if filename in lengths:
        return lengths[filename]
    try:
        result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                                 "format=duration", "-of",
                                 "json", filename],
                                capture_output=True)
#         if result.stderr:
#             logging.warning(f"{filename}: {result.stderr}")
        duration = float(json.loads(result.stdout)["format"]["duration"])
    except Exception as e:
        print(filename)
        print("returncode:", result.returncode)
        print("stdout:", result.stdout)
        print("stderr:",  result.stderr)
        print(result)
        raise e
    lengths[filename] = datetime.timedelta(seconds=duration)
    return lengths[filename]

In [4]:
url = "https://oyc.yale.edu/courses"
try:
    html_page = urllib.request.urlopen(url, timeout=600) # timeout in second
except (URLError, HTTPError) as error:
    logging.error('Data not retrieved because %s\nURL: %s', error, url)
except timeout as error:
    logging.error('socket timed out %s- URL %s', e, url)
else:
    logging.info('Access successful.')
soup = BeautifulSoup(html_page)

table = soup.find(class_="views-table cols-5")
# python3 just use th.text
headers = [th.text.strip() for th in table.select("tr th")]
headers[0] = "Category"
headers[2] = "Serie Name"

series_df = pd.DataFrame(data=[[td.text.strip() for td in row.find_all("td") if row]\
                                                                for row in table.select("tr")][1:],
                         columns=headers)

links = table.find_all("td", class_="views-field views-field-field-course-number")
urls = ["https://oyc.yale.edu" + link.a["href"] for link in links]

series_df.insert(value=pd.Series(list(urls), 
                 index=series_df.index),
                 column="Url",
                 loc=5)
series_df["Corpus"] = "yaleocw"

In [5]:
data, index = [], []

for sid, row in series_df.iterrows():
    print(f"[{sid + 1}/{series_df.shape[0]}]{row['Course Number']}:{row['Serie Name']}")
    url = row["Url"]
    try:
        html_page = urllib.request.urlopen(url)
        index.append(sid)
    except (HTTPError, AttributeError) as e:
        print("HTTPError:", sid, url)
        continue
    soup = BeautifulSoup(html_page)
    
    # recover author field
    author_subpart = soup.find("div", class_="views-field views-field-field-about-the-professor")
    author_name = author_subpart.select_one("b").string[6:]
    author_description = " ".join([p.text for p in author_subpart.select("p") if p]).strip()
    author = dict(name=author_name, description=author_description)
    
    # recover description field
    description = soup.find("span", string='About the Course', class_="views-label views-label-body").parent
    description = " ".join([p.text for p in description.select("p") if p]).strip()
    
    # recover course_struct field
    course_struct = soup.find("span", string='Course Structure',
                              class_="views-label views-label-field-course-structure").parent
    course_struct = " ".join([p.text for p in course_struct.select("p") if p]).strip()
    
    # recover material_url field
    material_url = soup.find("span", string='Course Materials',
                             class_="views-label views-label-field-course-download-link").parent
    material_url = material_url.select_one("a")["href"]
    
    # recover sessions field
    session_number = soup.find_all("td", class_="views-field views-field-field-session-display-number")
    session_number = [td.text.strip() for td in session_number if td]

    session_link = soup.find_all("td", class_="views-field views-field-field-session-display-title")
    session_link = ["https://oyc.yale.edu" + link.a["href"] for link in session_link]
    session_title = soup.find_all("td", class_="views-field views-field-field-session-display-title")
    session_title = [td.text.strip() for td in session_title if td]
    sessions = [dict(title=t, url=u, number=n) 
                         for t, u, n in zip(session_title, session_link, session_number)]
    
    # build the Series
    data.append([author, description, sessions, material_url, course_struct])
    
# Add to the dataframe
columns = ["Author", "Description", "Sessions", "Material Url", "Course Structure"]

series_df = pd.concat([series_df, pd.DataFrame(data=data, columns=columns, index=index)],
                       axis=1,
                       sort=False)
series_df.index.name = "Sid"

[1/40]AFAM 162:African American History: From Emancipation to the Present
[2/40]AMST 246:Hemingway, Fitzgerald, Faulkner
[3/40]ASTR 160:Frontiers and Controversies in Astrophysics
[4/40]BENG 100:Frontiers of Biomedical Engineering
[5/40]CHEM 125a:Freshman Organic Chemistry I
[6/40]CHEM 125b:Freshman Organic Chemistry II
[7/40]CLCV 205:Introduction to Ancient Greek History
[8/40]EEB 122:Principles of Evolution, Ecology and Behavior
[9/40]ECON 252:Financial Markets (2008)
[10/40]ECON 252:Financial Markets (2011)
[11/40]ECON 251:Financial Theory
[12/40]ECON 159:Game Theory
[13/40]ENGL 300:Introduction to Theory of Literature
[14/40]ENGL 220:Milton
[15/40]ENGL 310:Modern Poetry
[16/40]ENGL 291:The American Novel Since 1945
[17/40]EVST 255:Environmental Politics and Law
[18/40]GG 140:The Atmosphere, the Ocean, and Environmental Change
[19/40]HIST 116:The American Revolution
[20/40]HIST 119:The Civil War and Reconstruction Era, 1845-1877
[21/40]HIST 210:The Early Middle Ages, 284–1000
[22/40

In [6]:
print(series_df.shape)
series_df.head(10)
# series_df.loc[courses_df["Course Number"] == "ASTR 160"]

(40, 12)


Unnamed: 0_level_0,Category,Course Number,Serie Name,Professor Name,Date,Url,Corpus,Author,Description,Sessions,Material Url,Course Structure
Sid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,African American Studies,AFAM 162,African American History: From Emancipation to...,Jonathan Holloway,Spring 2010,https://oyc.yale.edu/african-american-studies/...,yaleocw,"{'name': 'Professor Jonathan Holloway', 'descr...",The purpose of this course is to examine the A...,"[{'title': 'Dawn of Freedom', 'url': 'https://...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."
1,American Studies,AMST 246,"Hemingway, Fitzgerald, Faulkner",Wai Chee Dimock,Fall 2011,https://oyc.yale.edu/american-studies/amst-246,yaleocw,"{'name': 'Professor Wai Chee Dimock', 'descrip...","This course examines major works by Hemingway,...","[{'title': 'Introduction', 'url': 'https://oyc...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus two..."
2,Astronomy,ASTR 160,Frontiers and Controversies in Astrophysics,Charles Bailyn,Spring 2007,https://oyc.yale.edu/astronomy/astr-160,yaleocw,"{'name': 'Professor Charles Bailyn', 'descript...",This course focuses on three particularly inte...,"[{'title': 'Introduction', 'url': 'https://oyc...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."
3,Biomedical Engineering,BENG 100,Frontiers of Biomedical Engineering,W. Mark Saltzman,Spring 2008,https://oyc.yale.edu/biomedical-engineering/be...,yaleocw,"{'name': 'Professor W. Mark Saltzman', 'descri...",The course covers basic concepts of biomedical...,"[{'title': 'What Is Biomedical Engineering?', ...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."
4,Chemistry,CHEM 125a,Freshman Organic Chemistry I,J. Michael McBride,Fall 2008,https://oyc.yale.edu/chemistry/chem-125a,yaleocw,"{'name': 'Professor J. Michael McBride', 'desc...",This is the first semester in a two-semester i...,"[{'title': 'How Do You Know?', 'url': 'https:/...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus thr..."
5,Chemistry,CHEM 125b,Freshman Organic Chemistry II,J. Michael McBride,Spring 2011,https://oyc.yale.edu/chemistry/chem-125b,yaleocw,"{'name': 'Professor J. Michael McBride', 'desc...",This is a continuation of Freshman Organic Che...,[{'title': 'Mechanism: How Energies and Kineti...,http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus thr..."
6,Classics,CLCV 205,Introduction to Ancient Greek History,Donald Kagan,Fall 2007,https://oyc.yale.edu/classics/clcv-205,yaleocw,"{'name': 'Professor Donald Kagan', 'descriptio...",This is an introductory course in Greek histor...,"[{'title': 'Introduction', 'url': 'https://oyc...",http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."
7,Ecology and Evolutionary Biology,EEB 122,"Principles of Evolution, Ecology and Behavior",Stephen C. Stearns,Spring 2009,https://oyc.yale.edu/ecology-and-evolutionary-...,yaleocw,"{'name': 'Professor Stephen C. Stearns', 'desc...",This course presents the principles of evoluti...,[{'title': 'The Nature of Evolution: Selection...,http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus thr..."
8,Economics,ECON 252,Financial Markets (2008),Robert J. Shiller,Spring 2008,https://oyc.yale.edu/economics/econ-252-08,yaleocw,"{'name': 'Professor Robert J. Shiller', 'descr...",Financial institutions are a pillar of civiliz...,[{'title': 'Finance and Insurance as Powerful ...,http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."
9,Economics,ECON 252,Financial Markets (2011),Robert J. Shiller,Spring 2011,https://oyc.yale.edu/economics/econ-252,yaleocw,"{'name': 'Professor Robert J. Shiller', 'descr...","An overview of the ideas, methods, and institu...",[{'title': 'Introduction and What this Course ...,http://openmedia.yale.edu/cgi-bin/open_yale/me...,"This Yale College course, taught on campus twi..."


In [198]:
def create_row(sid, row, eid, d):
    periods, transcriptions, description, duration, video, resources = (None,) * 6
    try:
        url = d["url"]
        try:
            html_page = urllib.request.urlopen(url)
        except HTTPError as e:
            print("HTTPError:", eid, url)
        soup = BeautifulSoup(html_page)

        description = soup.find("div", 
                                class_="ds-1col node node-session node-promoted " +
                                       "view-mode-full node-published node-not-sticky " +
                                       "author-1 odd clearfix clearfix"
                               ).find_all("p")
        description = " ".join([p.text for p in description if p])

        resources = soup.find("aside", attrs={"aria-label": "Resources"})
        resources = [{"url": r["href"], "title": r["title"], "type": r["type"]} 
                             for r in resources.find_all("a")] if resources else None
        if soup.find("video"):
            video = {"url": soup.find("video").find("source")["src"],
                     "captions": soup.find("video").find("track")["src"]}
            duration = get_length(video["url"])
            selector = soup.find("td", class_="views-field " + "views-field-field-audio--file").a
            if selector :
                video["sound"] = selector["href"]
            selector = soup.find("td",
                                 class_="views-field " + "views-field-field-low-bandwidth-video-file").a
            if selector:
                video["low band"] = selector["href"]
            selector = soup.find("td",
                                 class_="views-field " + "views-field-field-high-bandwidth-video-file").a                 
            if selector:
                video["high band"] = selector["href"]

            tr = soup.find("h1", attrs={"id": "transcript-top"}).parent.find_all(string=True) 
            full_tr = "".join(tr).strip()
            match_list = list(re.finditer(r"(Chapter.*)\[(\d+\:\d+\:\d+)\]", full_tr))
            match_end = re.search(r"((?:\[end of transcripts?\])?\s*Back to Top)$", full_tr)
            has_chap = soup.find("h3", class_="label-above strong", string="Lecture Chapters")
            if has_chap:
                timestamps_a = []
                titles_a = [a.text for a in has_chap.parent.parent.find_all("a")]
                div_timestamps = has_chap.parent.parent.find_all("div", class_="views-field-field-chapter-start-time")
                for div in div_timestamps:
                    timestamps = datetime.timedelta(seconds=int("".join(div.strings)))
                    timestamps_a.append(timestamps)
                periods_a = list(zip(titles_a, timestamps_a))
            if match_list:
                periods_m = []
                transcriptions = []
                for i, m in enumerate(match_list):
                    chaptitle, timestamp = m.groups()
                    th, tm, ts = timestamp.split(":")
                    periods_m.append((chaptitle, datetime.timedelta(seconds=int(th)*3600 + int(tm)*60 + int(ts))))
                    b, e = m.span()
                    if i+1 < len(match_list):
                      transcriptions.append(full_tr[e:match_list[i+1].span()[0]].strip())
                    else:
                      transcriptions.append(full_tr[e:match_end.span()[0]].strip())
            if has_chap and match_list:
                bools_checks_t = [ta == tm for (ca, ta), (cm, tm) in zip(periods_a, periods_m)]
                bools_checks_c = [ca in cm for (ca, ta), (cm, tm) in zip(periods_a, periods_m)]
                if not all(bools_checks_t):
                    print(f"\t[{eid + 1}/{len(row['Sessions'])}]{d['number']}:{d['title']}\n\t{d['url']}")
                    print("\tWarning: Chapters timestamps not match with hyperlink")
                    print([(ta, tm) for (ca, ta), (cm, tm), b in zip(periods_a, periods_m, bools_checks_t) if not b])
                if not all(bools_checks_c):
                    print(f"\t[{eid + 1}/{len(row['Sessions'])}]{d['number']}:{d['title']}\n\t{d['url']}")
                    print("\tWarning: Chapters titles not match with hyperlink")
                    print([(ca, cm) for (ca, ta), (cm, tm), b in zip(periods_a, periods_m, bools_checks_c) if not b])
                periods = periods_m
            else:
                print(f"\t[{eid + 1}/{len(row['Sessions'])}]{d['number']}:{d['title']}\n\t{d['url']}")
                if match_list:
                    periods = periods_m
                    print("\tWarning: Chapters not verified with hyperlink")
                elif has_chap:
                    periods = periods_a
                    transcriptions = [" ".join([s.strip() for s in tr[3:-4]])]
                    print("\tWarning: Chapters not link with transcription")
                else: 
                    transcriptions = [" ".join([s.strip() for s in tr[3:-4]])]
                    print("\tWarning: Without chapters")
                transcriptions = transcriptions if transcriptions and not transcriptions == list(['']) else None

        if video is None:
            print(f"\t[{eid + 1}/{len(row['Sessions'])}]{d['number']}:{d['title']}\n\t{d['url']}")
            print("\tWarning: Without video")
        if transcriptions and not periods:
            periods = [(d["title"], pd.NaT)]
        return [sid, eid, row["Course Number"], d["number"], d["title"], 
                periods if periods else None,
                transcriptions, 
                url, description, duration, video, resources]
    except Exception as e:
        print(f"Sid : {sid} eid : {eid}")
        print(url)
        print(match_list)
        print(match_end)
        print(tr[-10:])
        print("Error:", e)
        raise(e)

In [199]:
data = []
chapreg = re.compile(r"(.*)\[(\d+\:\d+\:\d+)\]")
for sid, row in series_df.iterrows():
    print(f"[{sid + 1}/{series_df.shape[0]}]{row['Course Number']}:{row['Serie Name']}\n{row['Url']}")
    for eid, d in enumerate(row["Sessions"]):
        data.append(create_row(sid, row, eid, d))
columns = ["Sid", "N In Series", "Course Number", "Number In Series", "Title", "Chapters",
           "Transcriptions", "Url", "Description", "Duration", "Video", "Ressources"]
episodes_df = pd.DataFrame(data=data, columns=columns)
episodes_df.index.name = "Eid"
episodes_df["Corpus"] = "yaleocw"

[1/40]AFAM 162:African American History: From Emancipation to the Present
https://oyc.yale.edu/african-american-studies/afam-162
	[1/25]Lecture 1:Dawn of Freedom
	https://oyc.yale.edu/african-american-studies/afam-162/lecture-1
[("Frederick Douglass' Speech, Delivered to Abolitionist Friends in 1852", 'Chapter 1. Frederick Douglass’ Speech, Delivered to Abolitionist Friends in 1852 '), (' What Does It Mean to Be American?', 'Chapter 2. What does it mean to be American? ')]
	[2/25]Lecture 2:Dawn of Freedom (continued)
	https://oyc.yale.edu/african-american-studies/afam-162/lecture-2
[(' Frances Ellen Watkins Harper Poem: "Bury Me in a Free Land"', 'Chapter 2. Frances Ellen Watkins Harper Poem: “Bury Me in a Free Land” ')]
	[3/25]Lecture 3:Reconstruction
	https://oyc.yale.edu/african-american-studies/afam-162/lecture-3
[(" The Freedmen's Bureau", 'Chapter 6. The Freedmen’s Bureau ')]
	[4/25]Lecture 4:Reconstruction (continued)
	https://oyc.yale.edu/african-american-studies/afam-162/lectu

	[25/25]Lecture 25:Who Speaks for the Race? (continued)
	https://oyc.yale.edu/african-american-studies/afam-162/lecture-25
[(' How Is Race Used?', 'Chapter 3. How is Race Used? ')]
[2/40]AMST 246:Hemingway, Fitzgerald, Faulkner
https://oyc.yale.edu/american-studies/amst-246
	[2/25]Lecture 2:Hemingway's In Our Time
	https://oyc.yale.edu/american-studies/amst-246/lecture-2
[(datetime.timedelta(seconds=2150), datetime.timedelta(seconds=2151)), (datetime.timedelta(seconds=2922), datetime.timedelta(seconds=2923))]
	[3/25]Lecture 3:Hemingway's In Our Time, Part II
	https://oyc.yale.edu/american-studies/amst-246/lecture-3
[(datetime.timedelta(seconds=326), datetime.timedelta(seconds=324)), (datetime.timedelta(seconds=771), datetime.timedelta(seconds=769)), (datetime.timedelta(seconds=1512), datetime.timedelta(seconds=1510)), (datetime.timedelta(seconds=1699), datetime.timedelta(seconds=1677)), (datetime.timedelta(seconds=2293), datetime.timedelta(seconds=2280)), (datetime.timedelta(seconds=24

	[22/25]Lecture 22:Faulkner's Light in August
	https://oyc.yale.edu/american-studies/amst-246/lecture-22
[(datetime.timedelta(seconds=2359), datetime.timedelta(seconds=2299))]
	[23/25]Lecture 23:Faulkner's Light in August, Part II
	https://oyc.yale.edu/american-studies/amst-246/lecture-23
[(datetime.timedelta(seconds=270), datetime.timedelta(seconds=271))]
	[23/25]Lecture 23:Faulkner's Light in August, Part II
	https://oyc.yale.edu/american-studies/amst-246/lecture-23
[(' Political Theology of the Neighbor ', 'Chapter 2: \xa0Political Theology of the Neighbor '), (' Alternation between Joe Christmas and Lena Grove ', 'Chapter 7: \xa0Alternation between Joe Christmas and Lena Grove ')]
	[24/25]Lecture 24:Faulkner's Light in August, Part III
	https://oyc.yale.edu/american-studies/amst-246/lecture-24
[(' Under Duress: Joe Brown uses the Word “Nigger” ', 'Chapter 2: \xa0Under Duress: Joe Brown uses the Word “Nigger” '), (' The Two Faces of Hightower  ', 'Chapter 6: The Two Faces of Hightow

	[1/41]Lecture 1:How Do You Know?
	https://oyc.yale.edu/chemistry/chem-125a/lecture-1
[(" Bacon's Instauration: Experimentation over Philosophy ", 'Chapter 3. Bacon’s Instauration: Experimentation over Philosophy '), (" Atoms, Molecules, and Hooke's Law ", 'Chapter 5. Atoms, Molecules, and Hooke’s Law ')]
	[2/41]Lecture 2:Force Laws, Lewis Structures and Resonance
	https://oyc.yale.edu/chemistry/chem-125a/lecture-2
[(datetime.timedelta(seconds=1302), datetime.timedelta(seconds=1303))]
	[4/41]Lecture 4:Coping with Smallness and Scanning Probe Microscopy
	https://oyc.yale.edu/chemistry/chem-125a/lecture-4
[(' Scanning Probe Microscopy: Feeling out Electron Pairs ', 'Chapter 3. Scanning Probe Microscopy: Feeling out and Seeing Electron Pairs ')]
	[7/41]Lecture 7:Quantum Mechanical Kinetic Energy
	https://oyc.yale.edu/chemistry/chem-125a/lecture-7
[(datetime.timedelta(seconds=2005), datetime.timedelta(seconds=2004))]
	[9/41]Lecture 9:Chladni Figures and One-Electron Atoms
	https://oyc.yale

	[7/38]Lecture 7:Nucleophilic Substitution Tools - Stereochemistry, Rate Law, Substrate, Nucleophile, Leaving Group
	https://oyc.yale.edu/chemistry/chem-125b/lecture-7
[('"Proving" a Mechanism by Imagining and Disproving All the Alternatives', 'Chapter 1. “Proving” a Mechanism by Imagining and Disproving All the Alternatives ')]
	[10/38]Lecture 10:Cation Intermediates – Alkenes: Formation, Addition, and Stability
	https://oyc.yale.edu/chemistry/chem-125b/lecture-10
[(' "Electrophilic" Addition to Alkenes', 'Chapter 5. “Electrophilic” Addition to Alkenes ')]
	[17/38]Lecture 17:Alkynes; Conjugation in Allylic Intermediates and Dienes
	https://oyc.yale.edu/chemistry/chem-125b/lecture-17
[(' When Does Conjugation Matter?  Allylic Intermediates and Transition States', 'Chapter 3. When Does Conjugation Matter?\xa0 Allylic Intermediates and Transition States ')]
	[18/38]Lecture 18:Linear and Cyclic Conjugation Theory; 4n+2 Aromaticity
	https://oyc.yale.edu/chemistry/chem-125b/lecture-18
[(" B

	[2/38]Lecture 2:Basic Transmission Genetics
	https://oyc.yale.edu/ecology-and-evolutionary-biology/eeb-122/lecture-2
[(datetime.timedelta(seconds=128), datetime.timedelta(seconds=89))]
	[2/38]Lecture 2:Basic Transmission Genetics
	https://oyc.yale.edu/ecology-and-evolutionary-biology/eeb-122/lecture-2
[(" Mendel's Laws", 'Chapter 4. Mendel’s Laws ')]
	[5/38]Lecture 5:How Selection Changes the Genetic Composition of Population
	https://oyc.yale.edu/ecology-and-evolutionary-biology/eeb-122/lecture-5
[(datetime.timedelta(seconds=657), datetime.timedelta(seconds=656))]
	[7/38]Lecture 7:The Importance of Development in Evolution
	https://oyc.yale.edu/ecology-and-evolutionary-biology/eeb-122/lecture-7
[(' "Boxes" (Transcription Factors)', 'Chapter 5. “Boxes” (Transcription Factors) ')]
	[9/38]Lecture 9:The Evolution of Sex
	https://oyc.yale.edu/ecology-and-evolutionary-biology/eeb-122/lecture-9
[(" The Traditional View on Sex's Existence", 'Chapter 2. The Traditional View on Sex’s Existence

	[17/26]Exam 2:Midterm Exam 2
	https://oyc.yale.edu/economics/econ-252-11/exam-2
	[18/26]Lecture 16:Guest Speaker Laura Cha
	https://oyc.yale.edu/economics/econ-252-11/lecture-16
[(" China's Public Sector and Opportunities in Other Emerging Markets", 'Chapter 2. China’s Public Sector and Opportunities in Other Emerging Markets ')]
	[21/26]Lecture 19:Investment Banks
	https://oyc.yale.edu/economics/econ-252-11/lecture-19
[(' Fougner: From ECON 252 to Wall Street', 'Chapter 5. Founger: From ECON 252 to Wall Street ')]
	[22/26]Lecture 20:Professional Money Managers and Their Influence
	https://oyc.yale.edu/economics/econ-252-11/lecture-20
[(' Trusts – Providing the Opportunity to Care for Your Children', 'Chapter 6. Trusts - Providing the Opportunity to Care for Your Children ')]
	[25/26]Lecture 23:Finding Your Purpose in a World of Financial Capitalism
	https://oyc.yale.edu/economics/econ-252-11/lecture-23
[(" Hopelessness: Challenging Malthus's Dismal Law", 'Chapter 3. Hopelessness: Cha

	[5/26]Lecture 5:The Idea of the Autonomous Artwork
	https://oyc.yale.edu/english/engl-300/lecture-5
[(' Kant and Coleridge: The Good, the Agreeable, and the Beautiful', 'Chapter 3. Kant and Coleridge: the Good, the Agreeable, and the Beautiful '), (' Wimsatt and Beardsley: The Anatomy of the "Poem"', 'Chapter 4. Wimsatt and Beardsley: the Anatomy of the “Poem” ')]
	[6/26]Lecture 6:The New Criticism and Other Western Formalisms
	https://oyc.yale.edu/english/engl-300/lecture-6
[('Yeats\' "Lapis Lazuli" and Tony the Tow Truck', 'Chapter 1. Yeats’s “Lapis Lazuli” and Tony the Tow Truck '), (' Brooks and the "Implications of "Unity"', 'Chapter 5. Brooks and the “Implications of “Unity” ')]
	[8/26]Lecture 8:Semiotics and Structuralism
	https://oyc.yale.edu/english/engl-300/lecture-8
[(' "Langue" and "Parole," "Signified" and "Signifier"', 'Chapter 2. “Langue” and “Parole,” “Signified” and “Signifier” '), (' Example: The Red Stoplight', 'Chapter 4. Example: the Red Stoplight ')]
	[9/26]Lectu

	[8/23]Lecture 10:God and Mammon: The Wealth of Literary Memory
	https://oyc.yale.edu/english/engl-220/lecture-10
[(' “Paradise Lost”: A Literary Fantasy of Forgetfulness', 'Chapter 3. Paradise Lost: A Literary Fantasy of Forgetfulness '), (' Analyzing “Paradise Lost”', 'Chapter 5. Analyzing Paradise Lost ')]
	[9/23]Lecture 11:The Miltonic Simile
	https://oyc.yale.edu/english/engl-220/lecture-11
[('Introduction: Similes in “Paradise Lost”', 'Chapter 1. Introduction: Similes in Paradise Lost '), (" Similes in “Paradise Lost”:  Satan's Shield Compared to the Moon", 'Chapter 2. Similes in Paradise Lost: Satan’s Shield Compared to the Moon '), (" Similes in “Paradise Lost”:  Satan's Spear Compared to the Mast of a Ship", 'Chapter 3. Similes in Paradise Lost: Satan’s Spear Compared to the Mast of a Ship '), (' Similes in “Paradise Lost”:  Simile of the Leaves', 'Chapter 4. Similes in Paradise Lost: Simile of the Leaves '), (' Hartman and Fish: Theories of Similes in “Paradise Lost”', 'Chapt

	[26/26]Exam 1:Final Exam
	https://oyc.yale.edu/english/engl-310/exam-1
[16/40]ENGL 291:The American Novel Since 1945
https://oyc.yale.edu/english/engl-291
	[1/27]Lecture 1:Introductions
	https://oyc.yale.edu/english/engl-291/lecture-1
[(datetime.timedelta(seconds=1771), datetime.timedelta(seconds=1770)), (datetime.timedelta(seconds=2638), datetime.timedelta(seconds=2637))]
	[1/27]Lecture 1:Introductions
	https://oyc.yale.edu/english/engl-291/lecture-1
[(' Introduction to Richard Wright’s “Black Boy”: Autobiography and Editorial Influence', 'Chapter 4. Introduction to Richard Wright’s Black Boy: Autobiography and Editorial Influence '), (' Conclusions: “Black Boy” and Major Course Themes', 'Chapter 5. Conclusions: Black Boy and Major Course Themes ')]
	[2/27]Lecture 2:Richard Wright, Black Boy
	https://oyc.yale.edu/english/engl-291/lecture-2
[(' Decoding Meaning in Wright’s Descriptive “Catalogs”', 'Chapter 3. Decoding Meaning in Wrightís Descriptive ìCatalogsî ')]
	[4/27]Lecture 4:Fla

	[15/24]Lecture 15:The Tobacco Paradigm
	https://oyc.yale.edu/environmental-studies/evst-255/lecture-15
[("Tobacco's Legal Paradigm", 'Chapter 1. Legal Strategies to Regulate Tobacco Use '), (" Isn't Nicotine a Drug?", 'Chapter 5. Isn’t Nicotine a Drug? ')]
	[16/24]Lecture 16:Evolution of Tobacco Law
	https://oyc.yale.edu/environmental-studies/evst-255/lecture-16
[(" If It's a Drug, Where's the Benefit?", 'Chapter 3. If It’s a Drug, Where’s the Benefit? ')]
	[20/24]Lecture 20:Managing Coastal Resources in an Era of Climate Change
	https://oyc.yale.edu/environmental-studies/evst-255/lecture-20
[(' Thinking about Vulnerabilities with Principles of Ecology', 'Chapter 2. Thinking about Vulnerabilities With Principles of Ecology ')]
	[22/24]Lecture 22:Past and Future of Nuclear Power
	https://oyc.yale.edu/environmental-studies/evst-255/lecture-22
[(" Nuclear Energy's Legal and Regulatory Structures", 'Chapter 2. Nuclear Energy’s Legal and Regulatory Structures ')]
	[24/24]Lecture 24:Reflect

	[4/25]Lecture 4:"Ever at Variance and Foolishly Jealous": Intercolonial Relations
	https://oyc.yale.edu/history/hist-116/lecture-4
[(datetime.timedelta(seconds=172), datetime.timedelta(seconds=173)), (datetime.timedelta(seconds=704), datetime.timedelta(seconds=705)), (datetime.timedelta(seconds=1438), datetime.timedelta(seconds=1439)), (datetime.timedelta(seconds=1843), datetime.timedelta(seconds=1842))]
	[5/25]Lecture 5:Outraged Colonials: The Stamp Act Crisis
	https://oyc.yale.edu/history/hist-116/lecture-5
[(datetime.timedelta(seconds=572), datetime.timedelta(seconds=574)), (datetime.timedelta(seconds=1344), datetime.timedelta(seconds=1345)), (datetime.timedelta(seconds=1849), datetime.timedelta(seconds=1850))]
	[6/25]Lecture 6:Resistance or Rebellion? (Or, What the Heck is Happening in Boston?)
	https://oyc.yale.edu/history/hist-116/lecture-6
[(datetime.timedelta(seconds=227), datetime.timedelta(seconds=228)), (datetime.timedelta(seconds=611), datetime.timedelta(seconds=612)), (da

	[16/25]Lecture 16:The Importance of George Washington
	https://oyc.yale.edu/history/hist-116/lecture-16
[(datetime.timedelta(seconds=216), datetime.timedelta(seconds=217)), (datetime.timedelta(seconds=1507), datetime.timedelta(seconds=1509)), (datetime.timedelta(seconds=1813), datetime.timedelta(seconds=1814)), (datetime.timedelta(seconds=2588), datetime.timedelta(seconds=2589))]
	[16/25]Lecture 16:The Importance of George Washington
	https://oyc.yale.edu/history/hist-116/lecture-16
[(" Washington's Symbolic Gestures as Commander-in-Chief of a Republican Army", 'Chapter 5. Washington’s Symbolic Gestures as Commander-in-Chief of a Republican Army '), (" Washington's Legacy as a Leader", 'Chapter 6. Washington’s Legacy as a Leader ')]
	[17/25]Lecture 17:The Logic of a Campaign (or, How in the World Did We Win?)
	https://oyc.yale.edu/history/hist-116/lecture-17
[(datetime.timedelta(seconds=639), datetime.timedelta(seconds=640))]
	[18/25]Lecture 18:Fighting the Revolution: The Big Picture

	[6/28]Lecture 6:Expansion and Slavery: Legacies of the Mexican War and the Compromise of 1850
	https://oyc.yale.edu/history/hist-119/lecture-6
[("Douglass's July Fourth Speech", 'Chapter 1. Douglass’s July Fourth Speech '), (" A Shrinking South? The South's Stance on Slavery in the West", 'Chapter 4. A Shrinking South? The South’s Stance on Slavery in the West ')]
	[7/28]Lecture 7:"A Hell of a Storm": The Kansas-Nebraska Act and the Birth of the Republican Party, 1854-55
	https://oyc.yale.edu/history/hist-119/lecture-7
[(" Consequences of the Fugitive Slave Act and Uncle Tom's Cabin", 'Chapter 4. Consequences of the Fugitive Slave Act and\xa0Uncle Tom’s Cabin\xa0')]
	[8/28]Lecture 8:Dred Scott, Bleeding Kansas, and the Impending Crisis of the Union, 1855-58
	https://oyc.yale.edu/history/hist-119/lecture-8
[(" Fremont's Near-Victory and the Failure of the Lecompton Constitution", 'Chapter 4. Fremont’s Near-Victory and the Failure of the Lecompton Constitution ')]
	[9/28]Lecture 9:John 

	[28/28]Exam:Final Exam
	https://oyc.yale.edu/history/hist-119/exam
[21/40]HIST 210:The Early Middle Ages, 284–1000
https://oyc.yale.edu/history/hist-210
	[4/22]Lecture 4:The Christian Roman Empire
	https://oyc.yale.edu/history/hist-210/lecture-4
[(' Platonism', 'Chapter 7:Platonism ')]
	[6/22]Lecture 6:Transformation of the Roman Empire
	https://oyc.yale.edu/history/hist-210/lecture-6
[(datetime.timedelta(seconds=2310), datetime.timedelta(seconds=2335))]
	[7/22]Lecture 7:Barbarian Kingdoms
	https://oyc.yale.edu/history/hist-210/lecture-7
[(datetime.timedelta(seconds=2073), datetime.timedelta(seconds=2355))]
	[7/22]Lecture 7:Barbarian Kingdoms
	https://oyc.yale.edu/history/hist-210/lecture-7
[(' The Barbarian Tribes: Vandals, Moors, Angles, Saxons, and Visigoths', 'Chapter 6: The Burgundians and the Burgundian Code ')]
	[8/22]Lecture 8:Survival in the East
	https://oyc.yale.edu/history/hist-210/lecture-8
[(datetime.timedelta(seconds=312), datetime.timedelta(seconds=311)), (datetime.tim

[25/40]HIST 276:France Since 1871
https://oyc.yale.edu/history/hist-276
	[2/22]Lecture 3:Centralized State and Republic
	https://oyc.yale.edu/history/hist-276/lecture-3
[(' Comte de Chambord: The "Miracle Baby"', 'Chapter 3. Comte de Chambord: The “Miracle Baby” ')]
	[3/22]Lecture 5:The Waning of Religious Authority
	https://oyc.yale.edu/history/hist-276/lecture-5
[(' The Decline of "The Awful Thing": Anti-Clericalism and De-Christianization', 'Chapter 2. The Decline of ‘The Awful Thing’: Anti-Clericalism and De-Christianization ')]
	[5/22]Lecture 7:Mass Politics and the Political Challenge from the Left
	https://oyc.yale.edu/history/hist-276/lecture-7
[(' The Challenge of Reform Socialism: Brousse and the "Possibilists"', 'Chapter 2. The Challenge of Reform Socialism: Brousse and the ‘Possibilists’ ')]
	[6/22]Lecture 8:Dynamite Club: The Anarchists
	https://oyc.yale.edu/history/hist-276/lecture-8
[(" Henry's Deed: The Bomb and the Aftermath", 'Chapter 4. Henry’s Deed: The Bomb and the

	[2/26]Lecture 2:Vita Nuova
	https://oyc.yale.edu/italian-language-and-literature/ital-310/lecture-2
[('An Introduction to "Vita nuova" and Its Autobiographical Structure', 'Chapter 1. An Introduction to Vita nuova and Its Autobiographical Structure '), (' "Vita nuova" as a Preamble to the "Divine Comedy"', 'Chapter 7. Vita nuova as a Preamble to the Divine Comedy '), (" Remarks on Dante's Life; Question and Answer", 'Chapter 8. Remarks on Dante’s Life; Question and Answer ')]
	[3/26]Lecture 3:Inferno I, II, III, IV
	https://oyc.yale.edu/italian-language-and-literature/ital-310/lecture-3
[(" Canto IV: Into the Garden and Limbo's Fantastic Figures", 'Chapter 7. Canto IV: Into the Garden and Limbo’s Fantastic Figures ')]
	[4/26]Lecture 4:Inferno V, VI, VII
	https://oyc.yale.edu/italian-language-and-literature/ital-310/lecture-4
[(" Dante's Political Understanding", 'Chapter 7. Dante’s Political Understanding ')]
	[6/26]Lecture 6:Inferno XII, XIII, XV, XVI
	https://oyc.yale.edu/italian-la

	[17/24]Lecture 17:Population in Modern China
	https://oyc.yale.edu/molecular-cellular-and-developmental-biology/mcdb-150/lecture-17
[(' 1900s Population Explosion in China', 'Chapter 4. 1950s Population Explosion in China ')]
	[18/24]Lecture 18:Economic Impact of Population Growth
	https://oyc.yale.edu/molecular-cellular-and-developmental-biology/mcdb-150/lecture-18
[(' Urban v. Rural Fertility in China', 'Chapter 2. Urban vs. Rural Fertility in China ')]
	[22/24]Lecture 22:Media and the Fertility Transition in Developing Countries (Guest Lecture by William Ryerson)
	https://oyc.yale.edu/molecular-cellular-and-developmental-biology/mcdb-150/lecture-22
[(" Population Growth, Women's Rights and Economy", 'Chapter 2. Population Growth, Women’s Rights and Economy ')]
	[23/24]Lecture 23:Biology and History of Abortion
	https://oyc.yale.edu/molecular-cellular-and-developmental-biology/mcdb-150/lecture-23
[(" Fertilization as a Marker of Life's Beginning", 'Chapter 3. Fertilization as a Mark

[31/40]PHYS 200:Fundamentals of Physics I
https://oyc.yale.edu/physics/phys-200
	[3/24]Lecture 3:Newton's Laws of Motion
	https://oyc.yale.edu/physics/phys-200/lecture-3
[(" Introduction to Newton's Laws of Motion, 1st Law and Inertial Frames", 'Chapter 2. Introduction to Newton’s Laws of Motion, 1st Law and Inertial Frames '), (" Newton's Third Law", 'Chapter 5. Newton’s Third Law ')]
	[9/24]Lecture 10:Rotations, Part II: Parallel Axis Theorem
	https://oyc.yale.edu/physics/phys-200/lecture-10
[(datetime.timedelta(seconds=4149), datetime.timedelta(seconds=4148))]
	[9/24]Lecture 10:Rotations, Part II: Parallel Axis Theorem
	https://oyc.yale.edu/physics/phys-200/lecture-10
[(' For System of Masses: Derive KEtotal = 1/2 MV2 + 1/2 ICM2', 'Chapter 2. For System of Masses: Derive KEtotal = ½ MV2 + ½ ICM2 ')]
	[10/24]Lecture 11:Torque
	https://oyc.yale.edu/physics/phys-200/lecture-11
	[11/24]Exam 1:Midterm Exam
	https://oyc.yale.edu/physics/phys-200/exam-1
	[12/24]Lecture 12:Introduction to R

	[25/27]Lecture 24:Quantum Mechanics VI: Time-Dependent Schrödinger Equation
	https://oyc.yale.edu/physics/phys-201/lecture-24
[('The "Theory of Nearly Everything"', 'Chapter 1: The “Theory of Nearly Everything” ')]
	[27/27]Exam 2:Final Exam
	https://oyc.yale.edu/physics/phys-201/exam-2
[33/40]PLSC 270:Capitalism: Success, Crisis, and Reform
https://oyc.yale.edu/political-science/plsc-270
	[2/24]Lecture 2:Thomas Malthus and Inevitable Poverty
	https://oyc.yale.edu/political-science/plsc-270/lecture-2
[(" Gregory Clarke's Explorations", 'Chapter 4. Gregory Clarke’s Explorations ')]
	[3/24]Lecture 3:Counting the Fingers of Adam Smith's Invisible Hand
	https://oyc.yale.edu/political-science/plsc-270/lecture-3
[(" Smith and Smithism in Today's World, with Jim Alexander", 'Chapter 3. Smith and Smithism in Today’s World, with Jim Alexander ')]
	[10/24]Lecture 10:Guest Lecture by Richard Medley: Entrepreneurship in Business Information
	https://oyc.yale.edu/political-science/plsc-270/lecture-

	[13/24]Lecture 13:The Sovereign State: Hobbes, Leviathan
	https://oyc.yale.edu/political-science/plsc-114/lecture-13
[(" Hobbes' Skeptical View of Knowledge", 'Chapter 2. Hobbes’s Skeptical View of Knowledge ')]
	[14/24]Lecture 14:The Sovereign State: Hobbes, Leviathan
	https://oyc.yale.edu/political-science/plsc-114/lecture-14
[("Introduction: Hobbes' Theory of Sovereignty", 'Chapter 1. Introduction: Hobbes’s Theory of Sovereignty ')]
	[15/24]Lecture 15:Constitutional Government: Locke, Second Treatise (1-5)
	https://oyc.yale.edu/political-science/plsc-114/lecture-15
[(" John Locke's Theory of Natural Law", 'Chapter 2. John Locke’s Theory of Natural Law ')]
	[17/24]Lecture 17:Constitutional Government: Locke, Second Treatise (13-19)
	https://oyc.yale.edu/political-science/plsc-114/lecture-17
[("The Role of Executive Power in Locke's Theory of Government", 'Chapter 1. The Role of Executive Power in Locke’s Theory of Government '), (" Contrasting Rawls' Theory of Justice with Locke's T

	[12/23]Lecture 12:Public Health vs. Medical Models in Nutrition Change: Saving Lives One or a Million at a Time
	https://oyc.yale.edu/psychology/psyc-123/lecture-12
[(' Longitudinal Studies: Cause and Effect over Time  ', 'Chapter 7. Longitudinal Studies: Cause and Effect over Time ')]
	[15/23]Lecture 15:Economics, Nutrition and Health: Subsidies, Food Deserts and More
	https://oyc.yale.edu/psychology/psyc-123/lecture-15
[(' Creating Sustainable Practices; Conclusions on the Economics of Food  ', 'Chapter 6. Creating Sustainable Practices; Conclusions on the Economics of Food ')]
	[20/23]Lecture 20:Schools and Nutrition: Where Health and Politics Collide (Guest Lecture by Marlene B. Schwartz)
	https://oyc.yale.edu/psychology/psyc-123/lecture-20
[(" Who's Who in School Food", 'Chapter 3. Who’s Who in School Food ')]
	[21/23]Lecture 21:The Morphing of the Modern Diet (Guest Lecture by Brian Wansink)
	https://oyc.yale.edu/psychology/psyc-123/lecture-21
[(datetime.timedelta(seconds=2085),

	[9/25]Lecture 9:The Priestly Legacy: Cult and Sacrifice, Purity and Holiness in Leviticus and Numbers
	https://oyc.yale.edu/religious-studies/rlst-145/lecture-9
[(' Ritual Purification, Sacrifices and Offerings, and "Imitatio Dei"', 'Chapter 4. Ritual Purification, Sacrifices and Offerings, and\xa0Imitatio Dei\xa0')]
	[10/25]Lecture 10:Biblical Law: The Three Legal Corpora of JE (Exodus), P (Leviticus and Numbers) and D (Deuteronomy)
	https://oyc.yale.edu/religious-studies/rlst-145/lecture-10
[("The Initiation of God's Laws, Rules and Ordinances at Sinai", 'Chapter 1. The Initiation of God’s Laws, Rules and Ordinances at Sinai ')]
	[12/25]Exam 1:Midterm Exam
	https://oyc.yale.edu/religious-studies/rlst-145/exam-1
	[15/25]Lecture 14:The Deuteronomistic History: Response to Catastrophe (1 and 2 Kings)
	https://oyc.yale.edu/religious-studies/rlst-145/lecture-14
[(" The Separation of the Kingdom Following Solomon's Death", 'Chapter 3. The Separation of the Kingdom Following Solomon’s Deat

	[2/24]Lecture 2:Don Quixote, Part I: Front Matter and Chapters I-X
	https://oyc.yale.edu/spanish-and-portuguese/span-300/lecture-2
[('"Don Quixote" and Its Effect on Readers', 'Chapter 1. Don Quixote and Its Effect on Readers ')]
	[3/24]Lecture 3:Don Quixote, Part I: Chapters I-X (cont.)
	https://oyc.yale.edu/spanish-and-portuguese/span-300/lecture-3
[('Innovative Features in "Don Quixote"', 'Chapter 1. Innovative Features in Don Quixote '), (" Don Quixote's Particular Madness", 'Chapter 3. Don Quixote’s Particular Madness ')]
	[5/24]Lecture 5:Don Quixote, Part I: Chapters XI-XX (cont.)
	https://oyc.yale.edu/spanish-and-portuguese/span-300/lecture-5
[(" Juan Palomeque's Inn and Its Characters", 'Chapter 2. Juan Palomeque’s Inn and Its Characters '), (" Juan Palomeque's Inn as an Internal Emblem for the Novel", 'Chapter 4. Juan Palomeque’s Inn as an Internal Emblem for the Novel '), (" Cervantes's Notorious Errors", 'Chapter 5. Cervantes’s Notorious Errors ')]
	[6/24]Lecture 6:Don Quix

In [None]:
# match_list = create_row(1, series_df.loc[1], 4, series_df.loc[1, "Sessions"][4])

In [None]:
# [m.groups() for m in match_list], match_list

In [97]:
episodes_df.loc[lambda df: [not d for d in df["Chapters"]]]

Unnamed: 0_level_0,Sid,N In Series,Course Number,Number In Series,Title,Chapters,Transcriptions,Url,Description,Duration,Video,Ressources,Corpus
Eid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
58,2,8,ASTR 160,Exam 1,Midterm Exam 1,,,https://oyc.yale.edu/astronomy/astr-160/exam-1,Midterm Exam 1 covers Lectures 1 through 6.,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
68,2,18,ASTR 160,Exam 2,Midterm Exam 2,,,https://oyc.yale.edu/astronomy/astr-160/exam-2,"Midterm Exam 2 covers Lectures 8 through 15, f...",NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
94,3,15,BENG 100,Exam 1,Midterm Exam,,,https://oyc.yale.edu/biomedical-engineering/be...,The midterm exam covers Lectures 1 through 15.,NaT,"{'url': 'http://FILENAME.mp4', 'captions': '',...",[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
105,3,26,BENG 100,Exam 2,Final Exam,,,https://oyc.yale.edu/biomedical-engineering/be...,The final exam was administered in class.,NaT,"{'url': 'http://FILENAME.mp4', 'captions': '',...",[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
116,4,10,CHEM 125a,Exam 1,Midterm Exam 1,,,https://oyc.yale.edu/chemistry/chem-125a/exam-1,Midterm Exam covers the first quarter of the c...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
126,4,20,CHEM 125a,Exam 2,Midterm Exam 2,,,https://oyc.yale.edu/chemistry/chem-125a/exam-2,This Midterm Exam covers the second quarter of...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
137,4,31,CHEM 125a,Exam 3,Midterm Exam 3,,,https://oyc.yale.edu/chemistry/chem-125a/exam-3,This Midterm Exam covers the third quarter of ...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
146,4,40,CHEM 125a,Exam 4,Final Exam,,,https://oyc.yale.edu/chemistry/chem-125a/exam-4,The Final Exam covers material from the entire...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
230,7,21,EEB 122,Exam 1,Midterm Exam 1,,,https://oyc.yale.edu/ecology-and-evolutionary-...,Midterm Exam 1 covers the first half of the co...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
246,7,37,EEB 122,Exam 2,Midterm Exam 2,,,https://oyc.yale.edu/ecology-and-evolutionary-...,Midterm Exam 2 covers the second half of the c...,NaT,,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw


In [105]:
episodes_df

Unnamed: 0_level_0,Sid,N In Series,Course Number,Number In Series,Title,Chapters,Transcriptions,Url,Description,Duration,Video,Ressources,Corpus
Eid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,0,AFAM 162,Lecture 1,Dawn of Freedom,"[(Chapter 1. Frederick Douglass’ Speech, Deliv...",[Professor Jonathan Holloway: \n“Fellow citize...,https://oyc.yale.edu/african-american-studies/...,Professor Holloway offers an introduction to t...,00:34:51.489000,{'url': 'http://openmedia.yale.edu/projects/co...,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
1,0,1,AFAM 162,Lecture 2,Dawn of Freedom (continued),[(Chapter 1. Introduction and Recap of Last Cl...,"[Professor Jonathan Holloway: Yesterday, Mond...",https://oyc.yale.edu/african-american-studies/...,"In this lecture, Professor Holloway gives a br...",00:40:56.183578,{'url': 'http://openmedia.yale.edu/projects/co...,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
2,0,2,AFAM 162,Lecture 3,Reconstruction,[(Chapter 1. Introduction: The Reconstruction ...,[Professor Jonathan Holloway: \n“I know the n...,https://oyc.yale.edu/african-american-studies/...,"Between 1865 and 1877, several plans were deve...",00:48:54.793278,{'url': 'http://openmedia.yale.edu/projects/co...,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
3,0,3,AFAM 162,Lecture 4,Reconstruction (continued),[(Chapter 1. Billie Holliday Song: Strange Fru...,[Professor Jonathan Holloway: I’m going to st...,https://oyc.yale.edu/african-american-studies/...,After the massive cultural shift that the Sout...,00:47:17.548111,{'url': 'http://openmedia.yale.edu/projects/co...,[{'url': 'https://oyc.yale.edu/sites/default/f...,yaleocw
4,0,4,AFAM 162,Lecture 5,"Uplift, Accommodation, and Assimilation","[(Chapter 1. Alexander Crummell , 00:00:00, 0:...",[Professor Jonathan Holloway: Let me begin th...,https://oyc.yale.edu/african-american-studies/...,"In the closing decades of the 1800s, African A...",00:43:58.413778,{'url': 'http://openmedia.yale.edu/projects/co...,,yaleocw
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1053,39,19,SPAN 300,Lecture 20,"Don Quixote, Part II: Chapters XXXVI-LIII (cont.)",[(Chapter 1. Revisiting Major Themes in Don Qu...,[Professor Roberto González Echevarría: What i...,https://oyc.yale.edu/spanish-and-portuguese/sp...,"According to González Echevarría, Don Quixote’...",00:56:53.124344,{'url': 'http://openmedia.yale.edu/projects/co...,,yaleocw
1054,39,20,SPAN 300,Lecture 21,"Don Quixote, Part II: Chapters LIV-LXX","[(Chapter 1. Improvisation, International Dime...",[Professor Roberto González Echevarría: There ...,https://oyc.yale.edu/spanish-and-portuguese/sp...,Three issues related to the impending end of t...,00:55:32.969067,{'url': 'http://openmedia.yale.edu/projects/co...,,yaleocw
1055,39,21,SPAN 300,Lecture 22,"Don Quixote, Part II: Chapters LIV-LXX (cont.)","[(Chapter 1. Episodes in Barcelona , 00:00:00,...","[Professor Roberto González Echevarría: To me,...",https://oyc.yale.edu/spanish-and-portuguese/sp...,"As we approach the end of the novel, Cervantes...",00:58:52.892878,{'url': 'http://openmedia.yale.edu/projects/co...,,yaleocw
1056,39,22,SPAN 300,Lecture 23,"Don Quixote, Part II: Chapters LXXI-LXXIV",[(Chapter 1. Humor; Cervantes’s Self Portraits...,[Professor Roberto González Echevarría: As we ...,https://oyc.yale.edu/spanish-and-portuguese/sp...,González Echevarría focuses on the end of the ...,00:59:37.150111,{'url': 'http://openmedia.yale.edu/projects/co...,,yaleocw


In [108]:
keep_df = episodes_df.dropna().loc[:, ["Sid", "Transcriptions", "Chapters"]]
chapters_df = []
for sid, epgroup in keep_df.groupby("Sid"):
    for eid, row in epgroup.iterrows():
        attrlist = list(zip(row["Transcriptions"], row["Chapters"]))
        for i, (t, c) in enumerate(attrlist):
            end_timestamp = attrlist[i + 1][1][1] if i < len(attrlist) - 1 else episodes_df.loc[eid].Duration
            print(end_timestamp, c[1])
            duration = end_timestamp - c[1]
            chapters_df.append([eid, sid, i, t, c[0], c[1], end_timestamp, duration])
chapters_df = pd.DataFrame(chapters_df,
                           columns=["Eid", "Sid", "PartN", "Text", "Title", "BeginTimestamp", "EndTimestamp", "Duration"])
chapters_df.index.name = "Cid"
chapters_df["Corpus"] = "yaleocw"

00:04:44 00:00:00


TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
chapters_df

In [None]:
# @Deprecated reserve for local usage

# series_df["Path"] = None
# episodes_df["Path"] = None
# chapters_df["Path"] = None

# for sid, srow in series_df.iterrows():
#     stag, sname = srow["Course Number"], srow["Serie Name"]
#     stag, sname = re.sub(r"[ /\\]+", r"_", stag), re.sub(r"[ /\\]+", r"_", sname)
#     dirname = os.path.join("data/YaleSeriesCorpus", f"{sid}-{stag}-{sname}")
#     if not os.path.exists(dirname): os.makedirs(dirname)
#     for eid, erow in episodes_df[episodes_df.Sid==sid].iterrows():
#         name = f"{eid}-{erow['N In Series']}-{erow['Title']}"
#         name = re.sub(r"[ /\\]+", r"_", name)
#         subdirname = os.path.join(dirname, name)
#         if not os.path.exists(subdirname): os.makedirs(subdirname)
#         if erow["Transcriptions"]: 
#             transcription = "".join(erow["Transcriptions"])
#             with open(os.path.join(subdirname, "rawtext"), "w") as f:
#                 f.write(transcription)
#         for cid, crow in chapters_df[chapters_df.Eid==eid].iterrows():
#             name = f"{cid}-{crow['PartN']}-{crow['Title']}"
#             name = re.sub(r"[ /\\]+", r"_", name)
#             subsubdirname = os.path.join(subdirname, name)
#             if not os.path.exists(subsubdirname): os.makedirs(subsubdirname)
#             with open(os.path.join(subsubdirname, "rawtext"), "w") as f:
#                 f.write(crow["Text"])

In [None]:
series_df.to_csv(path_or_buf="data/series.csv", sep='|')
episodes_df.to_csv(path_or_buf="data/episodes.csv", sep='|')
chapters_df.to_csv(path_or_buf="data/chapters.csv", sep='|')

series_df.to_json("data/series.json")
episodes_df.to_json("data/episodes.json")
chapters_df.to_json("data/chapters.json")

series_df.to_pickle("data/series.pickle")
episodes_df.to_pickle("data/episodes.pickle")
chapters_df.to_pickle("data/chapters.pickle")