In [None]:
import altair as alt
import json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import squarify

In [None]:
alt.renderers.enable('notebook')

In [None]:
import wrangler
import utils

In [None]:
filename = '01._Receipt_-_DDOReceipt_Head_Date_and_Challan_Wise.csv'
filepath = utils.get_munged_filepath(filename)
df = pd.read_csv(filepath, parse_dates=True)
df = wrangler.wrangle_data_for_receipt(df, ['RECEIPTHEAD'])

In [None]:
def plot_top_10_ddo_for_district(district):
    data = df.groupby(['District', 'DDO Desc']).sum().dropna().loc[district].nlargest(10, 'Net Receipt')
    
    return alt.Chart(data.reset_index()).mark_bar(point=True).encode(
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='.2s')),

        y=alt.Y('DDO Desc:N',
                axis=alt.Axis(title='DDO Names'),
               sort=alt.EncodingSortField(field='Net Receipt', order='descending', op='sum')),

        color=alt.Color('DDO Desc', scale=alt.Scale(range=sns.color_palette().as_hex())),
        tooltip=['DDO Desc', alt.Text('Net Receipt', format='.3s')]
    ).properties(
        title='DDO wise Net Receipt for {}'.format(district),
        width=520,
        height=280
    )

In [None]:
def plot_top_10_receipt_heads_for_district(district):
    data = df.groupby(['District', 'Receipt Head']).sum().dropna().loc[district].nlargest(10, 'Net Receipt')
    
    return alt.Chart(
        data.reset_index(),
        width=500, height=300,
        title='Top 10 Receipt Heads for {}'.format(district)

    ).mark_bar(point=True).encode(
        
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='.2s')),

        y=alt.Y('Receipt Head:N',
                axis=alt.Axis(title='Receipt Heads'),
                sort=alt.EncodingSortField(field='Net Receipt', order='descending', op='sum')),

        color=alt.Color('Receipt Head', scale=alt.Scale(range=sns.color_palette().as_hex())),
        tooltip=['Receipt Head:N', alt.Text('Net Receipt:Q', format='.3s')]
    ).properties(
        title='Receipt Head wise Net Receipt for {}'.format(district),
        width=520,
        height=280
    )

In [None]:
## Vega Lite version of the plot in the next cell.
# data = df.groupby('Major Head').resample('M').sum()['Net Receipt']
# 
# VegaLite({
#   "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
    
#   "title": "Major Head wise timeline of receipt",
#   "width": 800,
#   "height": 350,
    
#   "data": {'values': json.loads(data.reset_index().to_json(orient='records'))},
    
#   "mark": {"type": "area", "line": True, "point": True},
#   "encoding": {
#     "x": {
#         "field": "Date", "type": "temporal",
#         "axis": {"format": "%b %Y"}
#     },
#     "y": {
#         "field": "Net Receipt", "type": "quantitative", "aggregate": "sum",
#         "axis": {"format": ".2s"}
#     },

#     "tooltip": [
#         {"field": "Major Head", "type": "nominal"},
#         {"field": "Net Receipt", "type": "quantitative", "aggregate": "sum", "format": ".3s"}
#     ],
#     "color": {
#       "field": "Major Head",
#       "type": "nominal",
#       "scale": {"scheme": "category20"},
#       "legend": None
#     }
#   }
# })

In [None]:
def plot_timeseries_major_head_distribution():
    '''
    Plot a time series distribution of Net Receipt categorized by Major Heads.
    '''
    data = df.groupby('Major Head').resample('M').sum()['Net Receipt']

    return alt.Chart(data.reset_index()).mark_area(line=True, point=True).encode(
        x=alt.X('Date:T',
                axis=alt.Axis(title='Timeline (Monthly)', format='%b %Y')),

        y=alt.Y('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='.2s')),

        color=alt.Color('Major Head', legend=None, scale=alt.Scale(scheme="category20")),
        tooltip=['Major Head', alt.Text('Net Receipt', format='.3s')]
    ).properties(
        title='Major Head wise Net Receipt Timeline',
        width=800
    ).interactive()

In [None]:
def plot_top_10_receipt_heads():
    data = df.groupby('Receipt Head', as_index=False).sum().nlargest(10, 'Net Receipt')

    sort_order = alt.EncodingSortField(field='Net Receipt', op='sum', order='descending')

    return alt.Chart(data.reset_index()).mark_bar(point=True).encode(
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupee)', format='.2s')),

        y=alt.Y('Receipt Head:N', sort=sort_order),

        color=alt.Color('Receipt Head',
                        scale=alt.Scale(range=sns.color_palette('Blues_r', 20).as_hex()),
                        sort=sort_order),
        tooltip=['Receipt Head', alt.Text('Net Receipt', format='.3s')],
    ).properties(
        title='Top Receipt Head wise Net Receipt',
        width=520,
        height=300
    )

In [None]:
def plot_major_head_groups_district_comparison():
    '''
    Plot a stacked bar chart for districts for their Receipts from different froup of major heads,
    for example to see how much money comes from Tax Revenue, Non-Tax Revenue, debts etc.
    '''
    ranges = {'Tax Revenue': ('0029', '0045'), 'Non Tax Revenue': ('0049', '1475'),
        'Misc Capital Receipt': ('4000', '4000'), 'Capital Accounts of Economic Services': ('4401', '4401'),
        'Public Debt': ('6003', '6003'), 'Loans': ('6202', '7610'), 'Public Account': ('8009', '8782')}

    district_maj_grouped = df.groupby(['District', 'Maj Code']).sum()

    new_df = {'Major Head Group': [], 'District': [], 'Net Receipt': []}

    for district in utils.DISTRICTS.values():
        for head, codes in ranges.items():
            grouped_amount = district_maj_grouped.loc[district][codes[0]:codes[1]]['Net Receipt'].sum()
            new_df['Major Head Group'].append(head)
            new_df['Net Receipt'].append(grouped_amount)
            new_df['District'].append(district)

    data = pd.DataFrame(new_df)
    data.set_index('District', drop=True, inplace=True)

    colors = ['coral pink', 'fresh green', 'windows blue', 'orange yellow', 'iris', 'orangish', 'seaweed']
    
    return alt.Chart(data.reset_index(), width=50, height=300).mark_bar().encode(
        column=alt.Column('District:O',
                         sort=alt.EncodingSortField(field='Net Receipt', op='sum', order='descending')),
        y=alt.Y('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='s')),
        color=alt.Color('Major Head Group', scale=alt.Scale(range=sns.xkcd_palette(colors).as_hex())),
        tooltip=['Major Head Group', alt.Text('Net Receipt', format='.3s')]
    ).properties(
        title='District wise Net Receipt Composition',
    ).interactive()

In [None]:
def plot_treemap():
    '''
    Plot a treemap distribution for group of major heads,
    for example to see how much money comes from Tax Revenue, Non-Tax Revenue, debts etc.
    '''
    bins = {'Tax Revenue': ('0029', '0045'), 'Non Tax Revenue': ('0049', '1475'),
        'Misc Capital Receipt': ('4000', '4000'), 'Capital Accounts of Economic Services': ('4401', '4401'),
        'Public Debt': ('6003', '6003'), 'Loans': ('6202', '7610'), 'Public Account': ('8009', '8782')}
    
    maj_grouped = df.groupby('Maj Code')['Net Receipt'].sum()
    mapping = {head: maj_grouped.loc[codes[0]: codes[1]].sum() for head, codes in bins.items()}
    data = pd.DataFrame(mapping.items(), columns=['Major Head Group', 'Net Receipt'])
    data = data.sort_values(by='Net Receipt')
    
    colors = ['coral pink', 'fresh green', 'orange yellow', 'dusky purple',
          'iris', 'orangish', 'seaweed']
    squarify.plot(sizes=data['Net Receipt'], label=data['Major Head Group'],
                  color=sns.xkcd_palette(colors).as_hex(), alpha=.8)
    plt.title('Major Head groups composition for overall receipts of Himachal Pradesh', fontsize=16, fontweight='bold')
    plt.axis('off')
    plt.rcParams['figure.figsize'] = (14, 9)
    plt.show()

In [None]:
def plot_district_ranking():
    '''
    Plots bar chart for the Total amount received for all districts to see where each district stands.
    '''
    data = df.groupby('District').sum().dropna()

    return alt.Chart(data.reset_index(), width=600, height=300).mark_bar().encode(
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='s')),
        y=alt.Y('District:O',
                sort=alt.EncodingSortField(field='Net Receipt', op='sum', order='descending'),
                axis=alt.Axis(title='Districts')),
        color='District',
        tooltip=[alt.Text('Net Receipt', format='.3s')]
    ).properties(
        title='District wise Net Receipt',
    )

In [None]:
def plot_time_series_for_district(district):
    '''
    Plot Net Receipt across a monthly timeline for a district.
    For example we could see how Net Receipt amount was distributed from April 2017 to August 2018 for Shimla.
    
    input - district:'SHIMLA'
    '''
    # group data by districts and date.
    district_grouped = df.groupby(['District', 'Date']).sum()

    # get monthly data for input district
    data = district_grouped.loc[district].resample('M').sum()
    
    # create a selection for mouseover on fields(here Date).
    peak = alt.selection_single(on='mouseover', nearest=True, empty='none')
    
    # create the main line chart.
    line = alt.Chart().mark_line().encode(
        
        x=alt.X('Date:T',
                axis=alt.Axis(title='Receipt Timeline (Monthly)', format='%b %Y')),
        
        y=alt.Y('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupees)', format='s')),
    )
    
    selectors = alt.Chart().mark_point().encode(
        x='Date:T',
        opacity=alt.value(0),
    ).add_selection(
        peak
    )
    
    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(peak, alt.value(1), alt.value(0))
    )
    
    # Draw text labels near the points, and highlight based on selection
    text = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(peak, 'Net Receipt:Q', alt.value(' '), format='.3s'),
    )
    
    # Put the five layers into a chart and bind the data
    return alt.layer(line, selectors, points, text,
                     data=data.reset_index(), 
                     width=700, height=350,
                     title='Monthly Receipt: {}'.format(district))

In [None]:
def plot_top_10_major_heads_for_district(district):
    '''
    Plot Top 10 Major heads for asked district.
    
    input - district:'SHIMLA'
    '''
    # group data by major head and districts
    district_maj_grouped = df.groupby(['District', 'Major Head']).sum().dropna()
    
    # select data for input district and drop the empty MAJ row.
    required_district = district_maj_grouped.loc[district][1:]
    
    # sort it descending and get only top 10.
    data = required_district.sort_values(by='Net Receipt', ascending=False)[:10]
    
    return alt.Chart(
        
        data=data.reset_index(),
        width=500, height=300,
        title='Top 10 Major Heads for {}'.format(district)
        
    ).mark_bar().encode(
        
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupee)', format='s', grid=False)),  
        
        y=alt.Y('Major Head:O',
                sort=alt.EncodingSortField(field='Net Receipt', op='sum', order='descending'),
                axis=alt.Axis(title='Major Heads')),
        
        color='Major Head',
        tooltip=['Major Head:O', alt.Text('Net Receipt:Q', format='.3s')]
    )

In [None]:
def plot_major_head_wise_receipt_for_district(major_heads, district):
    '''
    Plot input Major head wise receipt for asked district.
    
    input - major_heads: :list of major heads:, district:'SHIMLA'
    '''
    # group data by major head and districts
    district_maj_grouped = df.groupby(['District', 'Major Head']).sum().dropna()
    
    # select data for input district and drop the empty MAJ row.
    required_district = district_maj_grouped.loc[district][1:]
    
    # select only asked major heads
    data = required_district.loc[major_heads]
    
    sort_order = alt.EncodingSortField(field='Net Receipt', op='sum', order='descending')
    return alt.Chart(
        
        data=data.reset_index(),
        width=500, height=300,
        title='Major Head wise Receipt for {}'.format(district)
        
    ).mark_bar().encode(
        
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupee)', format='s', grid=False)),  
        
        y=alt.Y('Major Head:O',
                sort=sort_order,
                axis=alt.Axis(title='Major Heads')),
        
        color='Major Head',
        tooltip=['Major Head:O', alt.Text('Net Receipt:Q', format='.3s')]
    )

In [None]:
def plot_major_head_wise_receipt():
    '''
    Plot Major head wise receipt.
    
    input - category:'GROSS'
    '''
    # group data by major head and districts
    data = df.groupby('Major Head').sum().dropna()

    return alt.Chart(
        
        data=data.reset_index(),
        width=500, height=500,
        title='Major Head wise Receipt in Himchal Pradesh'
        
    ).mark_bar().encode(
        
        x=alt.X('Net Receipt:Q',
                axis=alt.Axis(title='Amount (in Rupee)', format='s', grid=False)),  
        
        y=alt.Y('Major Head:O',
                sort=alt.EncodingSortField(field='Net Receipt', op='sum', order='descending'),
                axis=alt.Axis(title='Major Heads')),
                
        color=alt.Color('Major Head', legend=None),
        
        tooltip=['Major Head:O', alt.Text('Net Receipt:Q', format='.3s')]

    ).interactive()