In [204]:
import pandas as pd
import datetime as dt
import json
import os
from icalendar import Calendar, Event

def stringtodate(date, time, timezone):
    string = date+' '+time+timezone[4:10].replace(':','')
    return dt.datetime.strptime(string,"%d/%m/%Y %H:%M:%S%z")

def formatstring(date, time, timezone):
    date = stringtodate(date,time,timezone)
    return dt.datetime.strftime(date,"%Y-%m-%dT%H:%M:%S"+timezone[4:10])

In [189]:
data = pd.read_csv('data.csv')

In [190]:
base_string = \
'''+++
  host= "{host}"
  date = "{start_time}"
  expiryDate = "{end_time}"
  title = "{title}"
  speaker = "{speaker}"
  speaker_institution = "{speaker_inst}"
  talk_site = "{talk_site}"
  categories = [{categories}]

  publishDate = "2000-02-07T16:00:00-07:00"
+++

{abstract}'''

In [200]:
parser = {
    'host': lambda x:x['Host'],
    'title': lambda x: x['Title'],
    'start_time' : lambda x : formatstring(x['Date'],x['Start_Time'], x['Timezone']),
    'end_time' : lambda x : formatstring(x['Date'],x['End_Time'], x['Timezone']),
    'start_time_as_date': lambda x: stringtodate(x['Date'],x['Start_Time'], x['Timezone']),
    'end_time_as_date': lambda x: stringtodate(x['Date'],x['End_Time'], x['Timezone']),
    'speaker' : lambda x: x['Speaker'],
    'speaker_inst': lambda x: x['Speaker_inst'],
    'talk_site': lambda x: x['Site'],
    'categories': lambda x : '' if (not isinstance(x['arXiv'],str) or str(x['arXiv']).lstrip().rstrip() =='') \
                                else '"'+'","'.join(x['arXiv'].upper().replace(' ','').replace('MATH.','').split(','))+'"',
    'abstract': lambda x: x['Abstract'],
    'name': lambda x: 'talk/'+(x['Host']+'_'+x['Speaker'].replace(' ','_')+'_'+x['Date']).replace('/','')+'.md'
}

In [108]:
for row in data.iterrows():
    mydict = { prop:func(row[1]) for prop, func in parser.items()}
    with open(mydict['name'], 'w') as f:
        #f.write(base_string.format(**mydict))

In [201]:
for row in data.iterrows():
    mydict = { prop:func(row[1]) for prop, func in parser.items()}

In [202]:
mydict

{'host': 'Caltech',
 'title': 'Total curvature and the isoperimetric inequality',
 'start_time': '2020-04-07T16:00:00-07:00',
 'end_time': '2020-04-07T16:50:00-07:00',
 'start_time_as_date': datetime.datetime(2020, 4, 7, 16, 0, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))),
 'end_time_as_date': datetime.datetime(2020, 4, 7, 16, 50, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))),
 'speaker': 'Mohammad Ghomi',
 'speaker_inst': 'Georgia Tech',
 'talk_site': 'https://secure.math.ucla.edu/seminars/display.php?&id=834000',
 'categories': '"DG"',
 'abstract': 'The classical isoperimetric inequality states that in Euclidean space spheres provide enclosures of least perimeter for any given volume. According to the Cartan-Hadamard conjecture, this inequality may be generalized to spaces of nonpositive curvature. In this talk we discuss an approach to proving this conjecture via a comparison formula for the total curvature of level sets of functions on nonpositively curved ma

In [207]:
cal = Calendar()
cal['dtstart'] = '20050404T080000'
cal['summary'] = 'Open Math Seminars Calendar'

In [153]:
def clean_text(ot):
    base = list(map(lambda x: x.split('='), ot.split('+++')[1].lstrip().rstrip().split('\n')))
    clean = { x[0].lstrip().rstrip(): '='.join(x[1:]).lstrip().rstrip()[1:-1] for x in base}
    clean['abstract'] = ot.split('+++')[-1].lstrip().rstrip()
    clean['categories'] = ', '.join(map(lambda x: 'Math.'+x, clean['categories'].replace('"','').split(',')))
    return clean

In [166]:
inverse_parser = {
    'Timestamp': lambda x:'-1',
    'Email': lambda x:'jaumededios@gmail.com',
    'Title': lambda x: x['title'],
    'Speaker' : lambda x: x['speaker'],
    'Speaker_inst': lambda x: x['speaker_institution'],
    'Host': lambda x: x['host'],
    'Abstract': lambda x: x['abstract'],
    'Site': lambda x: x['talk_site'],
    'arXiv': lambda x : x['categories'],
    'Date': lambda x: x['date'][8:10]+'/'+x['date'][5:7]+'/'+x['date'][:4],
    'Start_Time' : lambda x : x['date'][11:19],
    'End_Time' : lambda x : x['expiryDate'][11:19],
    'Timezone': lambda x : 'UTC '+x['date'][-6:],
    'In_Charge': lambda x:  False,
}

In [175]:
old_files = []
for filename in os.listdir('old_talk/'):
    if filename[0]!='.':
        with open('old_talk/'+filename) as f:
            ot = f.read()
        print(filename)
        ct = clean_text(ot)
        old_files.append([inverse_parser[prop](ct) for prop in data.columns])
old_talk_data = pd.DataFrame(old_files, columns=data.columns)
old_talk_data.to_csv('old_talk_data.csv')

UCLAShimizu.md
BerkeleyPawloski.md
PrincetonLi.md
SeymourStanford.md
CaltechGhomi.md
VANTAGEFite.md
VANTAGEZywina.md
UCLAHunyh.md
ColumbiaBray.md
VANTAGESutherland.md
VANTAGEBucur.md
BerkeleyYing.md
OSUIncerti.md
NCCChudnovsky.md
CaltechTAO.md
PrincetonBeryland.md
UCLA_Irina_15042020.md
SIUDobrinen.md
ColumbiaMihatsch.md
