# Script that compiles accepted submissions to SMPC 2019 from tsv file, does a little cleaning, and organises presentations by schedule

And sorry, this is in python 2.7

In [14]:
import collections
import os
from os import listdir, chdir,makedirs
import shutil
import io  
import sys  
import urllib
import csv
from pylatexenc.latexencode import utf8tolatex
from pylatexenc.latex2text import LatexNodes2Text

from time import strptime
from datetime import datetime
from datetime import date

from dumptruck import DumpTruck
from collections import OrderedDict
from collections import Counter
from collections import deque

# Definitions for processing Program Files

In [15]:
# Interpret tsv rows from accepted submission spreadsheet
# Rows translated into unordered dictionaries

# work is a row from tsv, with colums representing:
# 0 : Submission ID
# 1 : Title
# 2 : Abstract
# 3 : Authors (affiliations) list, seperated by ';' and contact author marked by '*'
# 4 : authors email addresses and with primary contact marked by '*'
# 5-6 : Keywords for topic of presentation
# 7 : Presentation status (string, 4 options)

def submission(work):
    ID = 0 #submission number
    title = '' # string
    abstract = '' # string
    authors = [] # ordered list of strings
    institutions = [] # orders list of strings same length as authors
    emails = [] # orders list of strings same length as authors
    contact = '' # string of primary contact email
    keywords = [] # list of subject keywords provided
    status = '' # string representing on of {presentation, poster, symposium, syposium talk}
    
    
    ID = int(work[0])
    title = work[1]
    abstract = work[2]

    contributors = work[3].split('(') 
    authors = []
    institutions = []
    person = contributors[0].strip()
    for i in range(1,len(contributors)-1):
        
        ins = contributors[i].split(')')
        institutions.append(ins[0])
        if len(ins)>1:
            if len(ins[1]) > len(ins[1].strip('*')):
                person+= '*'
            authors.append(person)
            person = ins[-1][2:].strip()
        else:
            authors.append(person)
    authors.append(person)
    
    ins = contributors[-1].split(')')
    institutions.append(ins[0]) #institutions.append(ins[0].split(';')) CHANGED: STRING NOT LIST OF INSTITUTIONS PER AUTHOR 
    
    addresses = work[4].split(';')
    if len(addresses)==1: 
        emails = addresses
        contact = emails[0].strip('*')
    else: 
        for addy in addresses:
            email = addy.strip()
            if email[-1]=='*':
                contact = email.strip('*')
                email = contact
            emails.append(email)

    status = work[7]
    keywords = work[5:6]
        
        
    entry = {'Submission': ID, 'Title': title, 'Abstract': abstract, 'Authors': authors,
             'Institutions': institutions,'Emails': emails, 'Contact': contact, 
             'Subjects': keywords, 'Status': status}
    
    return entry

In [16]:
# Style choice: Initials listed in names will not have terminal periods. 
def cleanInitials(name):
    iauthor = name.strip()
    iauthor = iauthor.split()
    fullName = ''
    for name in iauthor:
        name.strip('.')
        if len(name.split('.'))>1:
            ins = name.split('.')
            initials = ''
            for inits in ins:
                initials+=inits
            name = initials
        fullName+= ' ' + name
    fullName = fullName.strip()
    return fullName

# Encoding Programs files in Databases

In [None]:
chdir('./data')

In [25]:
# Gather all accepted submissions in database

DBname = 'WorksAccepted.db'
dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    print len(data)
    dt.drop()
else:
    print 'Generating new database'
    
# Load the tsv file
ABS_raw = []

with open('Accepted_Submissions.tsv') as tsvfile: # Corrections received before conference but improperly filed are still being implimented
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    ABS_raw.append(row)
    
# Instead of using names are listed per submission, we draw details from a cleaned authors list with standard affiliation formating
AuthorDets = []
with open('UniqueAuthors.txt') as tsvfile:
  reader = csv.reader(tsvfile, delimiter=';')
  for row in reader:
    # clean initials by removing periods
    fullName = cleanInitials(row[1])
    # Construct Last name first Author Index value
    A = fullName.split()
    a =  A[-1] + ','
    for j in range(len( A)-1):
        a += ' ' +  A[j]
    row[1] = fullName
    row[0] = a
    AuthorDets.append(row)


# enter talk submissions into database  
p = 1
while p < len(ABS_raw):
    work = ABS_raw[p]
    p+=1
    entry = submission(work)
    
    # Correct authors details from cleaned list
    # note sometimes strings behave badly. This spagetti alerts me of the inconsistencies while getting the job done.
    auths = entry['Authors']
    institutions = []
    authorIndex = []
    authors = []
    for i in range(len(auths)):
        ad = ['NA','Anon','nowhere']
        found = 'No'
        fullName = cleanInitials(auths[i].strip('*'))
        for aDets in AuthorDets:
            if fullName == aDets[1].strip():
                ad = aDets
                found = 'Yes'
        if found == 'No':
            pauthor =  fullName.split()
            for aDets in AuthorDets:
                iauthor = aDets[1].split()
                if iauthor[-1].lower() == pauthor[-1].lower():
                    if iauthor[0].lower() == pauthor[0].lower():
                        ad = aDets
                        found = 'Yes'
                        print found + ' outside ' + fullName + ' ' + aDets[1]
            if found == 'No':
                print found + ' record: ' + fullName
                ad[2] = entry['Institutions'][i]
                A = fullName.split()
                a =  A[-1] + ','
                for j in range(len( A)-1):
                    a += ' ' +  A[j]
                ad[1] = fullName   
                ad[0] = a
                #AuthorDets.append(ad)
                #print found + ' ' + auths[i] + ' ' + entry['Status'] + str(ad)
        
        if auths[i][-1] == '*':
            ad[1] +='*'
        authors.append(ad[1])
        institutions.append(ad[2])
        authorIndex.append(ad[0])
    
    entry['Institutions'] = institutions
    entry['AuthorIndex'] = authorIndex
    entry['Authors'] = authors
    
    dt.insert(entry)
    
data = dt.dump()
print len(data)



Clear existing database
362
No record: na
Yes outside Elizabeth H Margulis Elizabeth Margulis
No record: Julian Céspedes-Guevara
No record: Kelly Sierra
No record: Steven Vargas
No record: Lindsay Warrenburg
No record: Lindsay Warrenburg
No record: Tamar I Regev
No record: María Marchiano
No record: Birgitta Burger
No record: Kirk N Olsen
No record: Ji Chul Kim
No record: Pauline Larrouy-Maestri
No record: Pauline Larrouy-Maestri
No record: David Sears
No record: Ani Patel
No record: Dominique T Vuvan
No record: David Sears
No record: Emma B Greenspon
No record: Ed Large
No record: Birgitta Burger
No record: Caroline Palmer
No record: Michael Weiss
No record: Aimee E Battcock
No record: Caroline Palmer
No record: Tanushree Agrawal
No record: Chantal Carrilo
Yes outside Anna V Kasdan Anna Kasdan
Yes outside HYESOO YOO Hyesoo Yoo
No record: Elizabeth Margulis
No record: Benjamin Zendel
No record: Alexandre Celma Miralles
No record: Carlota Pagès
No record: Shantala Hegde
No record: Flore

# Reading the preliminary program spreadsheet for when presentations were happening
Includes some sketchy moves to get around the formatting choices in the prelim program
Preliminary program was kept here: https://docs.google.com/spreadsheets/d/1_pl7OrfTCMbm7LGcEGyyzTCdd-tDWigk75Egn7kJ5go/edit?usp=sharing

Sheets were downloaded as tsv files and then process below to extract details of timing, tracks, and submission numbers.
Note: posters were processed from a closely tied document that shifted successive sessions below, rather than beside the initial set. A small difference that simplified the extraction.

In [28]:
# Extract talk session times and titles on August 5th
# first extract session information, then fit in talk details, from matching submission numbers.
TalkSched_Aug5 = []
with open('190726 Preliminary Program - August 5th.tsv') as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    TalkSched_Aug5.append(row)

DBname = 'Aug5_Sessions.db'

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'


Date = TalkSched_Aug5[1][1]
Tracks = TalkSched_Aug5[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']

# convert to database of unordered dictionaries and 
# add details from talks database
sessions = []
SessionSlotNumber = 0;
for i in range(15): # first part of document contains session titles
       
    if len(TalkSched_Aug5[i+3][-2])>0: # pull rows with sessions
        talks = TalkSched_Aug5[i+3]
        Time = talks[0]
        SessionSlotNumber+=1
        
        for j in range(4): # go through sessions
                sess = []
                sess.append(Date)
                sess.append(Time)
                sess.append(SessionSlotNumber)
                sess.append(Tracks[j])
                sess.append(j+1)
                sess.append(Rooms[j])
                sess.append((j+1)+4*(SessionSlotNumber-1))
                
                tWords = talks[j+1].split('-')
                if len(tWords)<2:
                    SessionCode = sessions[sess[-1]-5][-2]
                    SessionTitle = sessions[sess[-1]-5][-1] + ', continued'
                        
                else:
                    SessionCode = tWords[0].strip()
                    title = tWords[1]
                    if len(tWords)>2:
                        for k in range(2,len(tWords)):
                            title+= '-' + tWords[k]
                    SessionTitle = title.strip()
                # check if symposium
                sympCheck = SessionTitle.split(':')
                isSymposium = 0
                if sympCheck[0] == 'Symposium':
                    isSymposium = 1
                sess.append(SessionCode)
                sess.append(SessionTitle)

                #print sess
                sessions.append(sess)

                entry = {'Date': Date, 'Times': Time, 'SessionSlot': SessionSlotNumber, 
                         'Track': Tracks[j],'TrackNumber': j+1,'Room':Rooms[j], 
                         'SessionNumber': (j+1)+4*(SessionSlotNumber-1),'SessionCode': SessionCode,
                         'SessionTitle': SessionTitle, 'Symposium':isSymposium}
                dt.insert(entry)
                print entry['SessionTitle'] + ' ' + str(isSymposium)
   
data = dt.dump()
print len(data)



Generating new database
Beat & Meter 1 0
Crossing Cultures 0
Aging 0
Ensemble Performance 1 0
Beat & Meter 2 0
Timbre 1 0
Having Vision 0
Ensemble Performance 2: Improvisation 0
The Voice 1 0
Timbre 2 0
Personal Music Listening 1 0
Symposium: LIVELab Part 1 1
The Voice 2 0
The Listener 0
Personal Music Listening 2: Ethics 0
Symposium: LIVELab Part 2 1
Ensemble Performance 3: Synchronization 0
Harmony 1: Expectation 0
Facial Emotion 0
Symposium: Music Training and Executive Function 1
Aesthetic Responses 0
Development 0
Modeling the Brain 0
Symposium: Music-Evoked Autobiographical Memories 1
24


In [29]:
# now extract talk specifics from Schedule in order
# enter submissions by matching submission numbers.

DBname = 'Aug5_Talks.db'

dtTalks = DumpTruck(dbname='WorksAccepted.db')
DBname = 'Aug5_Talks.db'
dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'
    
    
# now extract talk specifics from Schedule
# enter submissions into database

# prep values for case 1
Date = TalkSched_Aug5[1][1]
Tracks = TalkSched_Aug5[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']
talks = []
TalkTimeNumber = 0
TalkSessionNumber = 0
PresentationNumber = 1
SessionSlotNumber = 1
SessionTime = sessions[SessionSlotNumber][1]
SessionTimes = SessionTime.split('-')
SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p") 
TimeNumber = 0

for i in range(21,44): # first part of document containing talk titles
    if len(TalkSched_Aug5[i][0])>0: # pull rows with talks
        presentations = TalkSched_Aug5[i]
        print presentations
        Time = presentations[0]
        dt2 = datetime.strptime(Time, "%I:%M %p")
        TimeNumber+=1
        if dt2 >= SessionEnd:
            SessionSlotNumber += 1
            TalkSessionNumber = 0
            SessionTime = sessions[SessionNumber+1][1]
            SessionTimes = SessionTime.split('-')
            SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p")
        TalkSessionNumber += 1
        for j in range(4): # go through sessions
                PresentationNumber+=1
                if len(presentations[j+1])>0:
                    talk = []
                    talk.append(Date)
                    talk.append(Time)
                    talk.append(TimeNumber)
                    talk.append(Tracks[j])
                    talk.append(j+1)
                    talk.append(Rooms[j])
                    talk.append(SessionSlotNumber)
                    SessionNumber = (j+1)+4*(SessionSlotNumber-1)
                    talk.append(sessions[SessionNumber-1][-2])
                    talk.append(sessions[SessionNumber-1][-1])
                    talk.append(SessionNumber)
                    talk.append(TalkSessionNumber)
                    talk.append(PresentationNumber)

                    tWords = presentations[j+1].split('(')
                    
                    if len(tWords)>1:
                        SubmissionNumber = int(tWords[0].strip())

                        tWords = presentations[j+1].split(')')
                        tWords = tWords[0].split('(')
                        TalkPresentor = tWords[1].strip()

                        tWords = presentations[j+1].split(')')
                        title = tWords[1].strip()
                        if len(tWords)>2:
                            for k in range(2,len(tWords)):
                                title+= ')' + tWords[k]
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)

                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber, 
                                     'Status': t[0]['Status'],'Presentor':TalkPresentor,'TalkTitle': t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'],'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print entry['SubmissionNumber']
                            #print entry        
                    tWords = presentations[j+1].split()            
                    if tWords[-1].isdigit():
                        SubmissionNumber = int(tWords[-1])
                        TalkPresentor = 'Multi'
                        title = ''
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
    
                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber, 
                                     'Status': t[0]['Status'],'Presentor':TalkPresentor,'TalkTitle':  t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'],'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print entry['SubmissionNumber']
                            #print entry
                    else:
                        SubmissionNumber = 0
                        TalkPresentor = ''
                        title = presentations[j+1]
                    
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)

        
data = dt.dump()
print len(data)

#print data[-1]


Generating new database
['9:30 AM', '296 (C. Miller-Rigoli) Recent experience effects in complex rhythm processing', '49 (E. Margulis) The Stories Music Tells: Cross-Cultural Narratives for Wordless Music', '299 (G. Barradas) Psychological Mechanisms underlying musical emotions in dementia', '279 ( C. Palmer) Role of ears, heads, and eyes in vocal duet performance']
296
49
299
279
['9:45 AM', '302 (P. Cariani) Recurrent timing nets for rhythmic expectancy', "76 (L. Heng) Timbre's role in communicating emotions between performers and listeners from Western art music and Chinese music cultures", '318 (A. Good) Group singing improves psychosocial wellbeing in older adults', '159 (A. Roman) Individual Musician\xe2\x80\x99s Spontaneous Performance Rates Affect Interpersonal Synchrony in Joint Musical Performance: A Dynamical Systems Model.']
302
76
318
159
['10:00 AM', '338 (K. Nave) Children synchronize their finger taps to rhythms through iterated reproduction', '101 (E. Beier) Similar ac

In [30]:
# Gather Files for August 6th Sessions

TalkSched_Aug6 = []

#with open('Static_SMPC2019_Program_August5thS.tsv') as tsvfile:
# with open('Preliminary_August_6th_talks.tsv') as tsvfile:
#with open('Talks_Prelim.txt') as tsvfile:
with open('190719 Preliminary Program - August 6th - Spoken.tsv') as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    TalkSched_Aug6.append(row)
    #print row

DBname = 'Aug6_Sessions.db'

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'


Date = TalkSched_Aug6[1][1]
Tracks = TalkSched_Aug6[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']

# convert to database of unordered dictionaries and 
# add details from talks database
sessions = []
SessionSlotNumber = 0;
for i in range(15): # first part of document contains session titles
       
    if len(TalkSched_Aug6[i+3][2])>0: # pull rows with sessions
        talks = TalkSched_Aug6[i+3]
        # print talks
        Time = talks[0]
        SessionSlotNumber+=1
        
        for j in range(4): # go through sessions
                sess = []
                sess.append(Date)
                sess.append(Time)
                sess.append(SessionSlotNumber)
                sess.append(Tracks[j])
                sess.append(j+1)
                sess.append(Rooms[j])
                sess.append((j+1)+4*(SessionSlotNumber-1))
                
                tWords = talks[j+1].split('-')
                if len(tWords)<2:
                    SessionCode = sessions[sess[-1]-5][-2]
                    SessionTitle = sessions[sess[-1]-5][-1] + ', continued'
                        
                else:
                    SessionCode = tWords[0].strip()
                    title = tWords[1]
                    if len(tWords)>2:
                        for k in range(2,len(tWords)):
                            title+= '-' + tWords[k]
                    SessionTitle = title.strip()
                # check if symposium
                sympCheck = SessionTitle.split(':')
                isSymposium = 0
                if sympCheck[0] == 'Symposium':
                    isSymposium = 1
                sess.append(SessionCode)
                sess.append(SessionTitle)

                #print sess
                sessions.append(sess)

                entry = {'Date': Date, 'Times': Time, 'SessionSlot': SessionSlotNumber, 
                         'Track': Tracks[j],'TrackNumber': j+1,'Room':Rooms[j], 
                         'SessionNumber': (j+1)+4*(SessionSlotNumber-1),'SessionCode': SessionCode,
                         'SessionTitle': SessionTitle, 'Symposium':isSymposium}
                dt.insert(entry)
                print entry['SessionTitle'] + ' ' + str(isSymposium)     
   
data = dt.dump()
print len(data)
# print data

Clear existing database
Beat & Meter 3: Time 0
Harmony 2 0
Neuroscience 1 0
Effects of Music Training 0
Beat & Meter 4: Processing 0
Learning 0
Neuroscience 2 0
Absolutes 0
Memory 0
Melody 1: Topography 0
Embodiment 0
Symposium: Open Science Part 1 1
Music Training 2: Language 0
Methodology 0
Music Therapy 0
Symposium: Open Science Part 2 1
Social Interventions 0
Form 1 0
Medical Interventions 0
Melody 2 0
Mental Representations 0
Form 2: Closure 0
Music in the Hands 0
Beat & Meter 5: Non-Human Perspectives 0
24


In [31]:
# now extract talk specifics from Schedule of August 6th 
# enter submissions into database
DBname = 'Aug6_Talks.db'

dtSubmissions = DumpTruck(dbname='WorksAccepted.db')

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'
    
    
Date = TalkSched_Aug6[1][1]
Tracks = TalkSched_Aug6[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']
talks = []
TalkTimeNumber = 0
TalkSessionNumber = 0
PresentationNumber = 80
SessionSlotNumber = 1
SessionTime = sessions[SessionSlotNumber][1]
SessionTimes = SessionTime.split('-')
SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p") 
TimeNumber = 20

# need condition to advance session slot number, likely need to use time...

for i in range(19,37): # first part of document containing talk titles
    #print TalkSched_Aug6[i]
    if len(TalkSched_Aug6[i][0])>0: # pull rows with talks
        presentations = TalkSched_Aug6[i]
        Time = presentations[0]
        dt2 = datetime.strptime(Time, "%I:%M %p")
        TimeNumber+=1
        if dt2 >= SessionEnd:
            SessionSlotNumber += 1
            TalkSessionNumber = 0
            SessionTime = sessions[SessionNumber+1][1]
            SessionTimes = SessionTime.split('-')
            SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p")
        TalkSessionNumber += 1
        
        for j in range(4): # go through sessions
                PresentationNumber+=1
                if len(presentations[j+1])>0:
                    talk = []
                    talk.append(Date)
                    talk.append(Time)
                    talk.append(TimeNumber)
                    talk.append(Tracks[j])
                    talk.append(j+1)
                    talk.append(Rooms[j])
                    talk.append(SessionSlotNumber)
                    SessionNumber = (j+1)+4*(SessionSlotNumber-1)
                    talk.append(sessions[SessionNumber-1][-2])
                    talk.append(sessions[SessionNumber-1][-1])
                    talk.append(SessionNumber)
                    talk.append(TalkSessionNumber)
                    talk.append(PresentationNumber)

                    tWords = presentations[j+1].split('(')
                    
                    if len(tWords)>1:
                        SubmissionNumber = int(tWords[0].strip())

                        tWords = presentations[j+1].split(')')
                        tWords = tWords[0].split('(')
                        TalkPresentor = tWords[1].strip()

                        tWords = presentations[j+1].split(')')
                        title = tWords[1].strip()
                        if len(tWords)>2:
                            for k in range(2,len(tWords)):
                                title+= ')' + tWords[k]
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)

                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber, 
                                     'Status': t[0]['Status'],'Presentor':TalkPresentor,'TalkTitle':  t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'], 'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print str(entry['SubmissionNumber']) + ' : ' + t[0]['Title']
                            #print entry        
                    tWords = presentations[j+1].split()            
                    if tWords[-1].isdigit():
                        SubmissionNumber = int(tWords[-1])
                        TalkPresentor = 'Multi'
                        title = ''
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
    
                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber, 
                                     'Status': t[0]['Status'],'Presentor':TalkPresentor,'TalkTitle':  t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'], 
                                     'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print str(entry['SubmissionNumber']) + ' : ' + t[0]['Title']
                            #print entry
                    else:
                        SubmissionNumber = 0
                        TalkPresentor = ''
                        title = presentations[j+1]
                    
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)

        
data = dt.dump()
print len(data)

Clear existing database
27 : Motown, Disco, and Drumming: The Effects of Beat Salience and Song Memory on Tempo Perception
45 : Harmonicity and Consonance Within an Unconventional Tuning System
275 : Prevalence of BDNF polymorphism in musicians: Evidence for compensatory motor learning strategies in music?
40 : Auditory processing abilities in formally trained and self-taught musicians
202 : Timing is Everything… or is it? Effects of Timing Style and Timing Reference on Drum-Kit Sound in Groove Performance
221 : Identifying prototypical harmonic progressions across (tertian) styles
199 : Enhanced subcortical responses of musicians to sounds presented on metrically strong beats
42 : Musical training and decision making ability: A resting-state amplitude of low frequency fluctuations (ALFF) study
215 : Time and Timelessness in 20th-Century Music: An Experimental Study
379 : Harmonic Grammar, Chord Frequency, and Database Structure
375 : Neural time-frequency characteristics of auditory a

In [32]:
# Gather Files for August 7th Sessions

TalkSched_Aug7 = []

with open('190719 Preliminary Program - August 7th - Spoken.tsv') as tsvfile:
#with open('Program_August_7th_Spoken.tsv') as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    TalkSched_Aug7.append(row)
    #print row


DBname = 'Aug7_Sessions.db'

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'


Date = TalkSched_Aug7[1][1]
Tracks = TalkSched_Aug7[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']

# convert to database of unordered dictionaries and 
# add details from talks database
sessions = []
SessionSlotNumber = 0;
for i in range(13): # first part of document contains session titles
       
    if len(TalkSched_Aug7[i+3][-2])>0: # pull rows with sessions
        talks = TalkSched_Aug7[i+3]
        # print talks
        Time = talks[0]
        SessionSlotNumber+=1
        
        for j in range(4): # go through sessions
                sess = []
                sess.append(Date)
                sess.append(Time)
                sess.append(SessionSlotNumber)
                sess.append(Tracks[j])
                sess.append(j+1)
                sess.append(Rooms[j])
                sess.append((j+1)+4*(SessionSlotNumber-1))
                
                tWords = talks[j+1].split('-')
                if len(tWords)<2:
                    SessionCode = sessions[sess[-1]-5][-2]
                    SessionTitle = sessions[sess[-1]-5][-1] + ', continued'
                        
                else:
                    SessionCode = tWords[0].strip()
                    title = tWords[1]
                    if len(tWords)>2:
                        for k in range(2,len(tWords)):
                            title+= '-' + tWords[k]
                    SessionTitle = title.strip()
                # check if symposium
                sympCheck = SessionTitle.split(':')
                isSymposium = 0
                if sympCheck[0] == 'Symposium':
                    isSymposium = 1
                sess.append(SessionCode)
                sess.append(SessionTitle)

                #print sess
                sessions.append(sess)

                entry = {'Date': Date, 'Times': Time, 'SessionSlot': SessionSlotNumber, 
                         'Track': Tracks[j],'TrackNumber': j+1,'Room':Rooms[j], 
                         'SessionNumber': (j+1)+4*(SessionSlotNumber-1),'SessionCode': SessionCode,
                         'SessionTitle': SessionTitle, 'Symposium':isSymposium}
                dt.insert(entry)
                print entry['SessionTitle'] + ' ' + str(entry['SessionSlot'])    
   
data = dt.dump()
print len(data)
# print data

Clear existing database
Symposium: Everyday Music in Infancy 1
Beat & Meter 6: Syncopation 1
Speech 1
Symposium: Musical Expression in the Eye of the Beholder 1
Perceived Emotion 1 2
Modeling Performance 2
Dance 2
Symposium: The ACTOR Project Part 1 2
Perceived Emotion 2 3
Expert Performance 3
Development 2 3
Symposium: The ACTOR Project Part 2 3
12


In [33]:
# now extract talk specifics from Schedule for August 7th
# enter submissions into database
DBname = 'Aug7_Talks.db'

dtSubmissions = DumpTruck(dbname='WorksAccepted.db')

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'
    
Date = TalkSched_Aug7[1][1]
Tracks = TalkSched_Aug7[2][1:]
Rooms = ['KC802','KC905/907','KC909','KC914']
talks = []
TalkTimeNumber = 0
TalkSessionNumber = 0
PresentationNumber = 144
SessionSlotNumber = 1
SessionTime = sessions[SessionSlotNumber][1]
SessionTimes = SessionTime.split('-')
SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p") 
TimeNumber = 36

# need condition to advance session slot number, likely need to use time...

for i in range(17,31): # first part of document containing talk titles
    print TalkSched_Aug7[i]
    if len(TalkSched_Aug7[i][0])>0: # pull rows with talks
        presentations = TalkSched_Aug7[i]
        Time = presentations[0]
        dt2 = datetime.strptime(Time, "%I:%M %p")
        TimeNumber+=1
        if dt2 >= SessionEnd:
            SessionSlotNumber += 1
            TalkSessionNumber = 0
            SessionTime = sessions[SessionNumber+1][1]
            SessionTimes = SessionTime.split('-')
            SessionEnd=datetime.strptime(SessionTimes[1], "%I:%M %p")
        TalkSessionNumber += 1
        
        for j in range(4): # go through sessions
                PresentationNumber+=1
                if len(presentations[j+1])>0:
                    talk = []
                    talk.append(Date)
                    talk.append(Time)
                    talk.append(TimeNumber)
                    talk.append(Tracks[j])
                    talk.append(j+1)
                    talk.append(Rooms[j])
                    talk.append(SessionSlotNumber)
                    SessionNumber = (j+1)+4*(SessionSlotNumber-1)
                    talk.append(sessions[SessionNumber-1][-2])
                    talk.append(sessions[SessionNumber-1][-1])
                    talk.append(SessionNumber)
                    talk.append(TalkSessionNumber)
                    talk.append(PresentationNumber)

                    tWords = presentations[j+1].split('(')
                    
                    if len(tWords)>1:
                        SubmissionNumber = int(tWords[0].strip())

                        tWords = presentations[j+1].split(')')
                        tWords = tWords[0].split('(')
                        TalkPresentor = tWords[1].strip()

                        tWords = presentations[j+1].split(')')
                        title = tWords[1].strip()
                        if len(tWords)>2:
                            for k in range(2,len(tWords)):
                                title+= ')' + tWords[k]
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)

                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber, 
                                     'Status': t[0]['Status'], 'Presentor':TalkPresentor,'TalkTitle': t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'], 'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print entry['SubmissionNumber']
                            #print entry        
                    tWords = presentations[j+1].split()            
                    if tWords[-1].isdigit():
                        SubmissionNumber = int(tWords[-1])
                        TalkPresentor = 'Multi'
                        title = ''
                        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
                        t = dtTalks.execute(a)
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)
    
                        if len(t)>0:                    
                            entry = {'Date': Date, 'Time': Time, 'TalkSlot': TimeNumber, 'Track': Tracks[j],
                                     'TrackNumber': j+1,'Room':Rooms[j],'SessionSlot':SessionSlotNumber, 'SessionNumber': (j+1)+4*(SessionSlotNumber-1),
                                     'SessionCode': sessions[SessionNumber-1][-2],'SessionTitle': sessions[SessionNumber-1][-1],
                                     'PresentationNumber': PresentationNumber, 'TalkSessionNumber':TalkSessionNumber,'SubmissionNumber':SubmissionNumber,
                                     'Status': t[0]['Status'],'Presentor':TalkPresentor,'TalkTitle':  t[0]['Title'], 'Authors': t[0]['Authors'], 'Institutions': t[0]['Institutions'],
                                     'Abstract': t[0]['Abstract'],'Contact': t[0]['Contact'],'Subjects': t[0]['Subjects'], 'AuthorIndex': t[0]['AuthorIndex']}
                            dt.insert(entry)
                            print entry['SubmissionNumber']
                            #print entry
                    else:
                        SubmissionNumber = 0
                        TalkPresentor = ''
                        title = presentations[j+1]
                    
                        talk.append(SubmissionNumber)
                        talk.append(TalkPresentor)
                        talk.append(title)

                        talks.append(talk)

        
data = dt.dump()
print len(data)

print data[-1]

Generating new database
['9:30 AM', '250 (J. Mendoza et al.) Music in Infancy Symposium, 2501', '78 (N. Fram) Assessments of statistical measures of syncopation: Two approaches', '44 (A. Reed) Do Elements of Musicians\xe2\x80\x99 Speech Prosody Influence Their Created Vocal Melodies?', '150 (J. Vuoskoski et al.) Musical Expression in the Eye of the Beholder Symposium, 1501']
250
2501
78
44
150
1501
['9:45 AM', '2502', '206 (D. Temperley) Modeling Syncopation: Beyond Onset Pattern', '92 (M. Menon) Parsing ungrammatical sentences lead to preference for non-congruent musical pieces', '1502']
2502
206
92
1502
['10:00 AM', '2503', '235 (G. Sioros)The relation between groove and syncopation is intricate \xe2\x80\x93 not any pattern will do', '174 (N. Fisher) Is turn prediction accuracy across language and music dependent on the idiosyncrasies of one\xe2\x80\x99s own experience?', '1503']
2503
235
174
1503
['10:15 AM', '2504', '269 (F. Gouyon) Neural Resonance to Syncopated Rhythms: Model Pre

## Posters

In [34]:
# Poster Data for August 6th
Posters_Aug6 = []

#with open('Preliminary_August_6th_Posters.tsv') as tsvfile:
with open('190719 Preliminary Program - August 6th - Poster.tsv') as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    Posters_Aug6.append(row)
# now extract talk specifics from Schedule
# enter submissions into database
DBname = 'Aug6_Posters.db'

dtTalks = DumpTruck(dbname='WorksAccepted.db')

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'

Date = Posters_Aug6[1][1]
PosterSessionCode = 'P1'
PosterSessionNumber = 1
Location = 'Rosenthal Pavilion'
PosterNumber = 0
PositionNumber = -1
SessionEnd=datetime.strptime('4:45 PM', "%I:%M %p") 
TimeNumber = 0

# need condition to advance session slot number, likely need to use time...

for i in range(20,100): # first part of document with posters
    if len(Posters_Aug6[i][0])>0: # pull rows with posters
        Pentry = Posters_Aug6[i]
        PositionNumber += 2
        PosterNumber += 1
        Time = Pentry[4].split('-')
        dt2 = datetime.strptime(Time[1], "%I:%M %p")
        if dt2 > SessionEnd:
            PosterSessionNumber += 1
            PosterSessionCode = 'P'+str(PosterSessionNumber)
            PositionNumber = 2
            SessionEnd=dt2
            
        SubmissionNumber = int(Pentry[0])
        Time = Pentry[4]
        
        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
        t = dtTalks.execute(a)
        #entry = {'Submission': ID, 'Title': title, 'Abstract': abstract, 'Authors': authors,'Institutions': institutions,'Emails': emails, 'Contact': contact, 'Subjects': keywords, 'Status': status, 'Bio': bio}
                    
        entry = {'Date': Date, 'Time': Time, 'PosterSession': PosterSessionCode, 'PosterSessionNumber': PosterSessionNumber,
                     'Room':Location,'PositionNumber': PositionNumber, 'PosterNumber':PosterNumber,
                     'SubmissionNumber':SubmissionNumber,'PosterTitle': t[0]['Title'], 'Authors': t[0]['Authors'], 
                     'Institutions': t[0]['Institutions'],'Abstract': t[0]['Abstract'],
                     'Contact': t[0]['Contact'],'Status': t[0]['Status'],'Subjects': t[0]['Subjects'],'AuthorIndex': t[0]['AuthorIndex']}
        if entry['SubmissionNumber'] != 364:
            dt.insert(entry)
 
data = dt.dump()
print len(data)

print data[0]['PositionNumber']
print data[1]['PositionNumber']
print data[-2]['PositionNumber']
print data[-1]['PositionNumber']

Generating new database
79
1
3
78
80


In [35]:
# Poster Data for August 7th
Posters_Aug7 = []

with open('190719 Preliminary Program - August 7th - Poster.tsv') as tsvfile:
#with open('Preliminary_August_7th_Posters.tsv') as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
    Posters_Aug7.append(row)
    #print row
# now extract talk specifics from Schedule
# enter submissions into database
DBname = 'Aug7_Posters.db'

dtTalks = DumpTruck(dbname='WorksAccepted.db')

dt = DumpTruck(dbname=DBname)
if 'dumptruck' in dt.tables():
    data = dt.dump()
    print 'Clear existing database'
    dt.drop()
else:
    print 'Generating new database'

Date = Posters_Aug7[1][1]
PosterSessionCode = 'P3'
PosterSessionNumber = 3
Location = 'Rosenthal'
PosterNumber = 80
PositionNumber = -1
SessionEnd=datetime.strptime('11:45 AM', "%I:%M %p") 
TimeNumber = 0

# need condition to advance session slot number, likely need to use time...

for i in range(17,100): # first part of document with posters
    if len(Posters_Aug7[i][0])>0: # pull rows with posters
        Pentry = Posters_Aug7[i]
        PositionNumber += 2
        PosterNumber += 1
        Time = Pentry[4].split('-')
        dt2 = datetime.strptime(Time[1], "%I:%M %p")
        if dt2 > SessionEnd:
            PosterSessionNumber += 1
            PosterSessionCode = 'P'+str(PosterSessionNumber)
            PositionNumber = 2
            SessionEnd=dt2
            
        SubmissionNumber = int(Pentry[0])
        Time = Pentry[4]
        
        a = 'SELECT * FROM `dumptruck` WHERE `Submission` = ' + str(SubmissionNumber)                                    
        t = dtTalks.execute(a)
        #entry = {'Submission': ID, 'Title': title, 'Abstract': abstract, 'Authors': authors,'Institutions': institutions,'Emails': emails, 'Contact': contact, 'Subjects': keywords, 'Status': status, 'Bio': bio}
                    
        entry = {'Date': Date, 'Time': Time, 'PosterSession': PosterSessionCode, 'PosterSessionNumber': PosterSessionNumber,
                     'Room':Location,'PositionNumber': PositionNumber, 'PosterNumber':PosterNumber,
                     'SubmissionNumber':SubmissionNumber,'PosterTitle': t[0]['Title'], 'Authors': t[0]['Authors'], 
                     'Institutions': t[0]['Institutions'],'Abstract': t[0]['Abstract'],
                     'Contact': t[0]['Contact'],'Status': t[0]['Status'],'Subjects': t[0]['Subjects'],'AuthorIndex': t[0]['AuthorIndex']}
        dt.insert(entry)
 
data = dt.dump()
print len(data)

print data[0]['PositionNumber']
print data[1]['PositionNumber']
print data[-2]['PositionNumber']
print data[-1]['PositionNumber']

# print data[0]['PosterNumber']
# print data[1]['PosterNumber']
# print data[-2]['PosterNumber']
# print data[-1]['PosterNumber']

Generating new database
82
1
3
78
80


# make author list

In [36]:
# Steps to compile unique author lists, to be corrected and used above. 
# Currently commented out of efficient functionality to avoid errasing anything important


Authorslist = []
# make a list of author-affiliation pairs, sort by author, make unique by author
author = ['Lastname, First', 'First Last','Affiliations;Affiliations']

DBname = 'WorksAccepted.db'
dt = DumpTruck(dbname=DBname)
data = dt.dump()
# print len(data)

# print data[0]

# fileName = "authorlist.txv"
# f= open(fileName, 'w+')

AuDets = []

for work in data:
    k = len(work['Authors'])
    for i in range(k):
        autEntry  = ''
        nameSplit = work['Authors'][i].strip('*').split(' ')
        a = nameSplit[-1] + ','
        for m in range(len(nameSplit)-1):
            a+= ' '+ nameSplit[m]
            
        insts = work['Institutions'][i]
        autEntry = [a,work['Authors'][i].strip('*'), insts]
        Authorslist.append(autEntry)
        autDets = a + ';'+ work['Authors'][i].strip('*') + ';' + insts
        AuDets.append(autDets)
#         f.write(autDets) # doesn't work because of unicode limites of f.write
# f.close()

# now remove redundancies and correct inconsistencies in affiliations



In [37]:
AllAuthors = set()

for w in  AuDets:
    if w not in AllAuthors:
        AllAuthors.add(w)
        #print w
        
#print len(AllAuthors)


In [38]:
# test Authors list
AuthorDets = []

with open('UniqueAuthors.txt') as tsvfile:
  reader = csv.reader(tsvfile, delimiter=';')
  for row in reader:
    AuthorDets.append(row)
    
a = 'Adriana Zekveld'
# find the author
for aDets in AuthorDets:
    if a == aDets[1]:
        ad = aDets
print ad

['Zekveld, Adriana', 'Adriana Zekveld', 'Vu Medical Center']
