# HESS 3-335

In [132]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

%matplotlib inline

In [133]:
df = pd.read_csv('Hess_3_335.csv')

In [134]:
df

Unnamed: 0,title
0,Hess 3: Twelve Ecossaise for piano or orchestra
1,Hess 11: Romance No. 3 for violin & orchestra ...
2,Hess 14: Fragment of original version of Piano...
3,Hess 16: Original introduction to the Choral F...
4,Hess 25: String Trio (1793)
...,...
81,Hess 331: Minuet for piano in B flat major
82,Hess 332: Pastorella for String Quartet in D m...
83,Hess 333: Minuet-Scherzo for String Quartet in...
84,Hess 334: Draft for String Quartet in A major ...


## extract Hess number

In [135]:
def Hess_number(text):
    match = re.search(r'\bHess\b \d+\: ',text)
    if match is not None:
        df.loc[df['title']==text,['title']] = text.replace(match.group(0),'')
        return re.search('\d+',match.group(0)).group(0)
        
    else:
        return np.nan

In [136]:
df['Hess'] = df['title'].apply(lambda x:Hess_number(x))

In [137]:
df[df['Hess'].isna()]

Unnamed: 0,title,Hess


## Extract year

In [138]:
def year(text):
    match = re.search('\(\d+\)',text)
    if match is not None:
        df.loc[df['title']==text,['title']] = text.replace(match.group(0),'')
        return re.search('\d+',match.group(0)).group(0)
    else:
        return np.nan

In [139]:
df['year'] = df['title'].apply(lambda x:year(x))

In [140]:
df

Unnamed: 0,title,Hess,year
0,Twelve Ecossaise for piano or orchestra,3,
1,Romance No. 3 for violin & orchestra,11,1816
2,Fragment of original version of Piano Concerto...,14,1794
3,Original introduction to the Choral Fantasy,16,1808
4,String Trio,25,1793
...,...,...,...
81,Minuet for piano in B flat major,331,
82,Pastorella for String Quartet in D major,332,1799
83,Minuet-Scherzo for String Quartet in A major,333,1799
84,Draft for String Quartet in A major,334,1799


In [141]:
df[df['year'].isna()]

Unnamed: 0,title,Hess,year
0,Twelve Ecossaise for piano or orchestra,3,
10,Mozart fugue arranged for piano four hands,37,
12,String Quintet in F major (lost),39,
17,Piano Sonata in C major,52,
18,Piano variations on Freudvoll und Liedvoll,54,
38,Rondo for piano,84,
39,Piano cadenza for Op. 61a,85,
46,"Fragment for Solo Voice(s): ""Ritterblatt""",116,
51,Sketches for canons,299,
52,Sketches for canons,300,


## Extract key

In [142]:
def key(text):
    match = re.search(r' (C|D|E|F|G|A|B|H) ',text)
    if match is not None:
        return match.group(0)[:-1]
    else:
        return np.nan

In [143]:
df['key'] = df['title'].apply(lambda x:key(x))

In [144]:
df

Unnamed: 0,title,Hess,year,key
0,Twelve Ecossaise for piano or orchestra,3,,
1,Romance No. 3 for violin & orchestra,11,1816,
2,Fragment of original version of Piano Concerto...,14,1794,
3,Original introduction to the Choral Fantasy,16,1808,
4,String Trio,25,1793,
...,...,...,...,...
81,Minuet for piano in B flat major,331,,B
82,Pastorella for String Quartet in D major,332,1799,D
83,Minuet-Scherzo for String Quartet in A major,333,1799,A
84,Draft for String Quartet in A major,334,1799,A


In [145]:
def key_sign(text,key):
    match = re.search('(flat|sharp)',text)
    if match is not None:
        return key + ' ' + match.group(0)
    else:
        return key

In [146]:
df['key'] = df.apply(lambda x: key_sign(x['title'], x['key']),axis=1)

In [147]:
def key_major_minor(text,key):
    match = re.search(r'(major|minor)',text)
    if match is not None:
        return key + ' ' + match.group(0)
    else:
        return key

In [148]:
df['key'] = df.apply(lambda x: key_major_minor(x['title'], x['key']),axis=1)

In [149]:
# df['title']=df.apply(lambda x: x['title'].replace(x['key'],"") if x['key'] is not np.nan else x['title'],axis=1)

In [150]:
# def delete_in_and_for(text):
#     arr = text.split(' ')
#     lst = arr.pop()
#     while lst not in ['in','for']:
#         if len(arr)==0:
#             return text
        
#         lst =arr.pop()
        
#     return ' '.join(arr)

In [151]:
# df.loc[df['key'].isna()==False,['title']]= df[df['key'].isna()==False].apply(lambda x:delete_in_and_for(x['title']),axis=1)

In [152]:
df[df['key'].isna()==False]

Unnamed: 0,title,Hess,year,key
5,Movement in A flat major for string trio,28,1797.0,A flat major
6,String Quartet in F major,32,1799.0,F major
12,String Quintet in F major (lost),39,,F major
13,Violin Sonata in A major (fragment),46,1790.0,A major
14,Allegro con brio in E flat major for piano trio,47,1800.0,E flat major
15,Piano Trio in E flat major,49,1786.0,E flat major
16,Piano Trio in B flat major,50,1786.0,B flat major
17,Piano Sonata in C major,52,,C major
19,Bagatelle in C major,57,1824.0,C major
20,Piano Exercise in B flat major,58,1800.0,B flat major


## create columns to join with other table

In [153]:
df['WoO'] = np.nan

df['Biamonti'] = np.nan

In [154]:

df['name'] = 'Hess ' + df['Hess']

df

Unnamed: 0,title,Hess,year,key,WoO,Biamonti,name
0,Twelve Ecossaise for piano or orchestra,3,,,,,Hess 3
1,Romance No. 3 for violin & orchestra,11,1816,,,,Hess 11
2,Fragment of original version of Piano Concerto...,14,1794,,,,Hess 14
3,Original introduction to the Choral Fantasy,16,1808,,,,Hess 16
4,String Trio,25,1793,,,,Hess 25
...,...,...,...,...,...,...,...
81,Minuet for piano in B flat major,331,,B flat major,,,Hess 331
82,Pastorella for String Quartet in D major,332,1799,D major,,,Hess 332
83,Minuet-Scherzo for String Quartet in A major,333,1799,A major,,,Hess 333
84,Draft for String Quartet in A major,334,1799,A major,,,Hess 334


## Save

In [155]:
df.to_csv('df_fv_Hess_3_335.csv',index=False)