In [93]:
from pptx import Presentation
import quantipy.core.builds.powerpoint.helpers as hp
from pptx.enum.chart import XL_CHART_TYPE
from pptx.util import Pt

In [94]:
%%capture 
# Parameters
filename = "survey_results_public"

dataset = qp.DataSet("Stackoverflow 2019")
dataset.read_quantipy(path_meta="./data/interim/{}.json".format(filename), 
                       path_data="./data/interim/{}.csv".format(filename))

In [95]:
variables = ['LanguageWorkedWith',
             'LanguageDesireNextYear',
             'DatabaseWorkedWith',
             'DatabaseDesireNextYear',
             'PlatformWorkedWith',
             'PlatformDesireNextYear',
             'WebFrameWorkedWith',
             'WebFrameDesireNextYear',
             'MiscTechWorkedWith',
             'MiscTechDesireNextYear']


In [193]:
# sanity check, this matches data from the officially published results
countries_pct = dataset.crosstab('Country', pct=True).drop("All", axis=0, level=1)
all_countries = countries_pct.index.get_level_values(1)
countries_pct.sort_values(by=('Country. In which country do you currently reside?','@'), ascending=False).head()

Unnamed: 0_level_0,Question,Country. In which country do you currently reside?
Unnamed: 0_level_1,Values,@
Question,Values,Unnamed: 2_level_2
Country. In which country do you currently reside?,United States,23.6
Country. In which country do you currently reside?,India,10.2
Country. In which country do you currently reside?,Germany,6.6
Country. In which country do you currently reside?,United Kingdom,6.5
Country. In which country do you currently reside?,Canada,3.8
Country. In which country do you currently reside?,France,2.7
Country. In which country do you currently reside?,Brazil,2.2
Country. In which country do you currently reside?,Poland,2.2
Country. In which country do you currently reside?,Australia,2.1
Country. In which country do you currently reside?,Netherlands,2.1


In [192]:
countries = {}
for country in all_countries:
    slide_data = []
    for variable in variables:
        # convert country name to relevant code/number
        # and use it to filter the data before running the crosstab
        filter = {'Country':dataset.code_from_label('Country',country)}
        result = dataset.crosstab(variable, 
                                  pct=True, 
                                  f=filter)
        slide_data.append(result)
    countries[country] = slide_data    

In [189]:
for country in all_countries:
    prs = Presentation('./specs/pptx-template.pptx')

    for i, var in enumerate(variables):
        title = countries[country][i].index[0][0]
        data = countries[country][i].droplevel(axis=0,level=0).droplevel(axis=1, level=0)
        data = data.drop('All',axis=0).sort_values(by='@', ascending=False)/100
        chartData = hp.ChartData_from_DataFrame(data)
        slide = prs.slides.add_slide(prs.slide_layouts[6])  # blank slide
        x, y, cx, cy = 1524000, 1397000, 6096000, 4064000
        chart = slide.shapes.add_chart(
            XL_CHART_TYPE.COLUMN_CLUSTERED,
            x, y, cx, cy,
            chartData
        ).chart
        txBox = slide.shapes.add_textbox(x, y-1000000, cx, cy)
        txBox.text_frame.text = title

        chart.category_axis.tick_labels.font.size = Pt(12)
        chart.value_axis.tick_labels.font.size = Pt(10)
    prs.save("./results/stackoverflow_results/{}.pptx".format(country))