# Countries report

### Imports

In [1]:
import sys
import psycopg2
import pandas as pd

### Initial setup

In [2]:
conn = psycopg2.connect(host='localhost',
                        port='5432',
                        user='mffais',
                        password='pass',
                        database='bigquery')

### Calculate installs

In [3]:
cursor = conn.cursor()
query = '''
    SELECT country,
           COUNT(*) AS install
    FROM (
          SELECT user_pseudo_id,
                 geo ->> 'country' AS country
          FROM events
          WHERE geo ->> 'country' <> ''
          GROUP BY user_pseudo_id, geo ->> 'country'
    ) AS installs_table
    GROUP BY country
    ORDER BY country
'''
cursor.execute(query)
install = pd.read_sql(query, con=conn)
cursor.close()
install.rename(columns={ 'country':'Country', 'install':'Install' }, inplace=True)
install.head(20)

Unnamed: 0,Country,Install
0,Afghanistan,32
1,Albania,17
2,Algeria,370
3,Angola,28
4,Anguilla,3
5,Argentina,26
6,Armenia,6
7,Australia,3
8,Austria,1
9,Azerbaijan,13


### Calculate uninstalls

In [4]:
cursor = conn.cursor()
query = '''
    SELECT country,
           uninstall
    FROM (  
        SELECT geo ->> 'country' AS country,
               COUNT(*) AS uninstall
        FROM events
        WHERE event_name='app_remove'
          AND geo ->> 'country' <> ''
        GROUP BY country
    ) uninstall
    ORDER BY country
'''
cursor.execute(query)
uninstall = pd.read_sql(query, con=conn)
cursor.close()
uninstall.rename(columns={ 'country':'Country', 'uninstall':'Uninstall' }, inplace=True)
uninstall.head(20)

Unnamed: 0,Country,Uninstall
0,Afghanistan,18
1,Albania,11
2,Algeria,248
3,Angola,11
4,Anguilla,3
5,Argentina,19
6,Armenia,5
7,Australia,1
8,Azerbaijan,9
9,Bahamas,1


### Calculate usage

In [5]:
cursor = conn.cursor()
query = '''
    SELECT country,
           AVG(days_used) AS usage
    FROM (
      SELECT user_pseudo_id,
             country,
             days_used
      FROM (
        SELECT user_pseudo_id,
               COUNT(*) AS days_used
        FROM (
          SELECT event_date,
                 user_pseudo_id
          FROM events
          GROUP BY event_date, user_pseudo_id
        ) AS users_by_day
        GROUP BY user_pseudo_id
      ) AS days_used
      INNER JOIN (
        SELECT user_pseudo_id AS user_pseudo_idc,
               geo ->> 'country' AS country
        FROM events
        WHERE geo ->> 'country' <> ''
        GROUP BY user_pseudo_idc, country
      ) AS user_country
      ON days_used.user_pseudo_id = user_country.user_pseudo_idc
    ) AS days_used_report
    GROUP BY country
    ORDER BY country
'''
cursor.execute(query)
usage = pd.read_sql(query, con=conn)
usage.rename(columns={ 'country':'Country', 'usage':'Usage' }, inplace=True)
usage['Usage'] = usage['Usage'].apply(lambda value: '%.2f' % value)
usage.head(20)

Unnamed: 0,Country,Usage
0,Afghanistan,1.38
1,Albania,1.18
2,Algeria,1.4
3,Angola,1.32
4,Anguilla,1.33
5,Argentina,1.46
6,Armenia,2.17
7,Australia,5.0
8,Austria,1.0
9,Azerbaijan,1.69


### Merge install, uninstall and average to result table

In [6]:
result = pd.merge(install, uninstall, on='Country')
result['Net new install'] = result['Install'] - result['Uninstall']
result = pd.merge(result, usage, on='Country')
result.head(20)

Unnamed: 0,Country,Install,Uninstall,Net new install,Usage
0,Afghanistan,32,18,14,1.38
1,Albania,17,11,6,1.18
2,Algeria,370,248,122,1.4
3,Angola,28,11,17,1.32
4,Anguilla,3,3,0,1.33
5,Argentina,26,19,7,1.46
6,Armenia,6,5,1,2.17
7,Australia,3,1,2,5.0
8,Azerbaijan,13,9,4,1.69
9,Bahamas,3,1,2,1.33


### Output HTTP Header

In [7]:
print('Content-type: text/csv')
print('Content-Disposition: attachment; filename="countries.csv"')
print()

Content-type: text/csv
Content-Disposition: attachment; filename="countries.csv"



### Output variables

In [8]:
print('# Title: Countries report')

# Title: Countries report


### Ouput result

In [9]:
str = result.to_csv(index=False)
print(str.encode('ascii','xmlcharrefreplace').decode('utf-8'))

Country,Install,Uninstall,Net new install,Usage
Afghanistan,32,18,14,1.38
Albania,17,11,6,1.18
Algeria,370,248,122,1.40
Angola,28,11,17,1.32
Anguilla,3,3,0,1.33
Argentina,26,19,7,1.46
Armenia,6,5,1,2.17
Australia,3,1,2,5.00
Azerbaijan,13,9,4,1.69
Bahamas,3,1,2,1.33
Bahrain,4,1,3,1.25
Bangladesh,352,273,79,1.30
Belarus,14,6,8,1.50
Belgium,4,3,1,1.00
Belize,4,3,1,1.25
Benin,23,9,14,1.35
Bhutan,2,2,0,1.00
Bolivia,52,35,17,1.35
Bosnia & Herzegovina,5,2,3,1.00
Botswana,4,2,2,1.25
Brazil,17,9,8,1.35
Brunei,4,3,1,1.75
Bulgaria,6,2,4,1.17
Burkina Faso,38,13,25,1.66
Burundi,17,5,12,1.35
Cambodia,27,14,13,1.48
Cameroon,28,13,15,1.43
Canada,4,3,1,1.00
Cape Verde,3,2,1,1.00
Central African Republic,3,1,2,2.33
Chad,9,2,7,1.67
Chile,6,4,2,1.50
China,3,1,2,1.00
Colombia,38,20,18,1.18
Congo - Brazzaville,15,6,9,1.60
Congo - Kinshasa,30,12,18,1.33
Costa Rica,9,5,4,1.11
C&#244;te d&#8217;Ivoire,45,22,23,1.51
Croatia,1,1,0,1.00
Cuba,115,52,63,1.49
Cyprus,4,3,1,1.50
Czechia,3,3,0,2.00
Dominican Republic,5

### Release resources

In [10]:
conn.close()