In [1]:
import pandas as pd
from importlib import reload
from nlg import templates as tmpl
df = pd.read_csv('data/assembly.csv')
df['Vote share (%)'] = df.pop('Vote share').str.rstrip('%').astype(float)
df.head()

Unnamed: 0,Year,State,AC,Party,Vote share (%)
0,2013,Rajasthan,Jaisalmer,BJP,24.0
1,2013,Rajasthan,Jaipur,BJP,22.0
2,2013,Rajasthan,Jodhpur,BJP,20.0


In [3]:
# BJP won the largest voteshare in Jaisalmer
struct = {
    'intent': 'extreme',
    'data': df,
    'metadata': {
        'subject': 'BJP', # literal subject
        'verb': ['won', 'scored', 'achieved'],
        'adjective': ['highest', 'greatest', 'most', 'largest'],
        'object': {
            'template': 'vote share of {value} in {location}',
            'kwargs': {
                'location': {
                    '_type': 'cell',
                    'colname': 'AC',
                    '_filter': {'colname': 'Vote share (%)', 'filter': 'max'}
                },
                'value': {
                    '_type': 'cell',
                    'colname': 'Vote share (%)',
                    '_filter': 'max'
                }
            }
        }
    }
}

In [4]:
tmpl.Narrative(struct).render()

'BJP won the largest vote share of 24.0 in Jaisalmer.'

In [5]:
df = pd.DataFrame.from_dict({
    'singer': ['Kishore', 'Kishore', 'Kishore'],
    'partner': ['Lata', 'Asha', 'Rafi'],
    'n_songs': [20, 5, 15]
})
df.head()

Unnamed: 0,singer,partner,n_songs
0,Kishore,Lata,20
1,Kishore,Asha,5
2,Kishore,Rafi,15


In [6]:
# Kishore sang the most duets with Lata
struct = {
    'intent': 'extreme',
    'data': df,
    'metadata': {
        'subject': {  # inferred subject
            '_type': 'cell',
            'colname': 'singer',
            '_filter': 'mode'
        },
        'verb': 'sang',
        'adjective': 'most',
        'object': {
            'template': 'duets with {partner}',
            'kwargs': {
                'partner': {
                    '_type': 'cell',
                    'colname': 'partner',
                    '_filter': {'colname': 'n_songs', 'filter': 'max'}
                }
            }
        }
    }
}

In [7]:
tmpl.Narrative(struct).render()

'Kishore sang the most duets with Lata.'

In [8]:
df = pd.DataFrame.from_dict({
    'character': ['Eddard Stark', 'Jon Snow'],
    'n_episodes': [10, 56],
    'time_per_episode': [6.2, 5.5]
})
df.head()

Unnamed: 0,character,n_episodes,time_per_episode
0,Eddard Stark,10,6.2
1,Jon Snow,56,5.5


In [9]:
# Ned Stark's screen time is 0.7 minutes greater than that of Jon Snow.
struct = {
    'intent': 'comparison',
    'data': df,
    'metadata': {
        'subject': {
            'template': '{character}\'s screen time per episode',
            'kwargs': {
                'character': {
                    '_type': 'cell',
                    'colname': 'character',
                    '_filter': {'colname': 'time_per_episode', 'filter': 'max'}
                }
            }            
        },
        'verb': 'is',
        'quant': {
            'template': '{q} minutes',
            'kwargs': {
                'q': {
                    '_type': 'operation',
                    'expr': '{data.iloc[0].time_per_episode} - {data.iloc[1].time_per_episode}'
                }
            }
        },
        'adjective': 'more',
        'object': {
            'template': 'that of {character}',
            'kwargs': {
                'character': {
                    '_type': 'cell',
                    'colname': 'character',
                    '_filter': {'colname': 'time_per_episode', 'filter': 'min'}
                }
            }
        }
    }
}

In [10]:
tmpl.Narrative(struct).render()

"Eddard Stark's screen time per episode is 0.7000000000000002 minutes more than that of Jon Snow."

In [15]:
df = pd.read_csv('data/assembly.csv')
df['vote_share'] = df.pop('Vote share').str.rstrip('%').astype(float)
df.head()

Unnamed: 0,Year,State,AC,Party,vote_share
0,2013,Rajasthan,Jaisalmer,BJP,24.0
1,2013,Rajasthan,Jaipur,BJP,22.0
2,2013,Rajasthan,Jodhpur,BJP,20.0


In [16]:
# BJP voteshare is 4% higher in Jaisalmer than in Jodhpur
struct = {
    'intent': 'comparison',
    'data': df,
    'metadata': {
        'subject': {
            'template': '{party}\'s voteshare',
            'kwargs': {
                'party': {
                    '_type': 'cell',
                    'colname': 'Party',
                    '_filter': 'mode'
                }
            }
        },
        'verb': 'is',
        'quant': {
            'template': '{q}%',
            'kwargs': {
                'q': {
                    '_type': 'operation',
                    'expr': '{data.iloc[0].vote_share}-{data.iloc[2].vote_share}'
                }
            }
        },
        'adjective': ['higher', 'greater', 'more'],
        'object': {
            'template': 'in {x} than in {y}',
            'kwargs': {
                'x': {
                    '_type': 'cell',
                    'colname': 'AC',
                    '_filter': {'colname': 'vote_share', 'filter': 'max'}
                },
                'y': {
                    '_type': 'cell',
                    'colname': 'AC',
                    '_filter': {'colname': 'vote_share', 'filter': 'min'}
                }
            }
        }
    }
}

In [14]:
tmpl.Narrative(struct).render()

"BJP's voteshare is 4.0% more than in Jaisalmer than in Jodhpur."