In [None]:
# Code configurations
CONNECTION_STRING = "mongodb://192.168.56.102:27017"
DATABASE_NAME = "nus"
COLLECTION_NAME = "openfood"

In [None]:
from pymongo import MongoClient
from bson.code import Code
import pymongo, operator
import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode()

client = MongoClient(CONNECTION_STRING)
db = client[DATABASE_NAME]
openfood = db[COLLECTION_NAME]

In [None]:
mapper = Code("""
    function () {
        if (typeof this.countries !== 'undefined' && this.countries !== "") {
            var tmp_split = this.countries.split(",");
            for (i = 0; i < tmp_split.length; i++) {
                // Remove any punctuations and spaces from the start and end of string and change it to lowercase
                var country = tmp_split[i].replace(/^[ -.,()&$#!/\[\]{}"']*/, "").replace(/[ -.,()&$#!/\[\]{}"']*$/, "").toLowerCase();
                
                var isoCountries = {
                    'AF' : 'Afghanistan',
                    'AX' : 'Aland Islands',
                    'AL' : 'Albania',
                    'DZ' : 'Algeria',
                    'AS' : 'American Samoa',
                    'AD' : 'Andorra',
                    'AO' : 'Angola',
                    'AI' : 'Anguilla',
                    'AQ' : 'Antarctica',
                    'AG' : 'Antigua And Barbuda',
                    'AR' : 'Argentina',
                    'AM' : 'Armenia',
                    'AW' : 'Aruba',
                    'AU' : 'Australia',
                    'AT' : 'Austria',
                    'AZ' : 'Azerbaijan',
                    'BS' : 'Bahamas',
                    'BH' : 'Bahrain',
                    'BD' : 'Bangladesh',
                    'BB' : 'Barbados',
                    'BY' : 'Belarus',
                    'BE' : 'Belgium',
                    'BZ' : 'Belize',
                    'BJ' : 'Benin',
                    'BM' : 'Bermuda',
                    'BT' : 'Bhutan',
                    'BO' : 'Bolivia',
                    'BA' : 'Bosnia And Herzegovina',
                    'BW' : 'Botswana',
                    'BV' : 'Bouvet Island',
                    'BR' : 'Brazil',
                    'IO' : 'British Indian Ocean Territory',
                    'BN' : 'Brunei Darussalam',
                    'BG' : 'Bulgaria',
                    'BF' : 'Burkina Faso',
                    'BI' : 'Burundi',
                    'KH' : 'Cambodia',
                    'CM' : 'Cameroon',
                    'CA' : 'Canada',
                    'CV' : 'Cape Verde',
                    'KY' : 'Cayman Islands',
                    'CF' : 'Central African Republic',
                    'TD' : 'Chad',
                    'CL' : 'Chile',
                    'CN' : 'China',
                    'CX' : 'Christmas Island',
                    'CC' : 'Cocos (Keeling) Islands',
                    'CO' : 'Colombia',
                    'KM' : 'Comoros',
                    'CG' : 'Congo',
                    'CD' : 'Congo, Democratic Republic',
                    'CK' : 'Cook Islands',
                    'CR' : 'Costa Rica',
                    'CI' : 'Cote DIvoire',
                    'HR' : 'Croatia',
                    'CU' : 'Cuba',
                    'CY' : 'Cyprus',
                    'CZ' : 'Czech Republic',
                    'DK' : 'Denmark',
                    'DJ' : 'Djibouti',
                    'DM' : 'Dominica',
                    'DO' : 'Dominican Republic',
                    'EC' : 'Ecuador',
                    'EG' : 'Egypt',
                    'SV' : 'El Salvador',
                    'GQ' : 'Equatorial Guinea',
                    'ER' : 'Eritrea',
                    'EE' : 'Estonia',
                    'ET' : 'Ethiopia',
                    'FK' : 'Falkland Islands (Malvinas)',
                    'FO' : 'Faroe Islands',
                    'FJ' : 'Fiji',
                    'FI' : 'Finland',
                    'FR' : 'France',
                    'GF' : 'French Guiana',
                    'PF' : 'French Polynesia',
                    'TF' : 'French Southern Territories',
                    'GA' : 'Gabon',
                    'GM' : 'Gambia',
                    'GE' : 'Georgia',
                    'DE' : 'Germany',
                    'GH' : 'Ghana',
                    'GI' : 'Gibraltar',
                    'GR' : 'Greece',
                    'GL' : 'Greenland',
                    'GD' : 'Grenada',
                    'GP' : 'Guadeloupe',
                    'GU' : 'Guam',
                    'GT' : 'Guatemala',
                    'GG' : 'Guernsey',
                    'GN' : 'Guinea',
                    'GW' : 'Guinea-Bissau',
                    'GY' : 'Guyana',
                    'HT' : 'Haiti',
                    'HM' : 'Heard Island & Mcdonald Islands',
                    'VA' : 'Holy See (Vatican City State)',
                    'HN' : 'Honduras',
                    'HK' : 'Hong Kong',
                    'HU' : 'Hungary',
                    'IS' : 'Iceland',
                    'IN' : 'India',
                    'ID' : 'Indonesia',
                    'IR' : 'Iran',
                    'IQ' : 'Iraq',
                    'IE' : 'Ireland',
                    'IM' : 'Isle Of Man',
                    'IL' : 'Israel',
                    'IT' : 'Italy',
                    'JM' : 'Jamaica',
                    'JP' : 'Japan',
                    'JE' : 'Jersey',
                    'JO' : 'Jordan',
                    'KZ' : 'Kazakhstan',
                    'KE' : 'Kenya',
                    'KI' : 'Kiribati',
                    'KR' : 'Korea',
                    'KW' : 'Kuwait',
                    'KG' : 'Kyrgyzstan',
                    'LA' : 'Lao Peoples Democratic Republic',
                    'LV' : 'Latvia',
                    'LB' : 'Lebanon',
                    'LS' : 'Lesotho',
                    'LR' : 'Liberia',
                    'LY' : 'Libyan Arab Jamahiriya',
                    'LI' : 'Liechtenstein',
                    'LT' : 'Lithuania',
                    'LU' : 'Luxembourg',
                    'MO' : 'Macao',
                    'MK' : 'Macedonia',
                    'MG' : 'Madagascar',
                    'MW' : 'Malawi',
                    'MY' : 'Malaysia',
                    'MV' : 'Maldives',
                    'ML' : 'Mali',
                    'MT' : 'Malta',
                    'MH' : 'Marshall Islands',
                    'MQ' : 'Martinique',
                    'MR' : 'Mauritania',
                    'MU' : 'Mauritius',
                    'YT' : 'Mayotte',
                    'MX' : 'Mexico',
                    'FM' : 'Micronesia, Federated States Of',
                    'MD' : 'Moldova',
                    'MC' : 'Monaco',
                    'MN' : 'Mongolia',
                    'ME' : 'Montenegro',
                    'MS' : 'Montserrat',
                    'MA' : 'Morocco',
                    'MZ' : 'Mozambique',
                    'MM' : 'Myanmar',
                    'NA' : 'Namibia',
                    'NR' : 'Nauru',
                    'NP' : 'Nepal',
                    'NL' : 'Netherlands',
                    'AN' : 'Netherlands Antilles',
                    'NC' : 'New Caledonia',
                    'NZ' : 'New Zealand',
                    'NI' : 'Nicaragua',
                    'NE' : 'Niger',
                    'NG' : 'Nigeria',
                    'NU' : 'Niue',
                    'NF' : 'Norfolk Island',
                    'MP' : 'Northern Mariana Islands',
                    'NO' : 'Norway',
                    'OM' : 'Oman',
                    'PK' : 'Pakistan',
                    'PW' : 'Palau',
                    'PS' : 'Palestinian Territory, Occupied',
                    'PA' : 'Panama',
                    'PG' : 'Papua New Guinea',
                    'PY' : 'Paraguay',
                    'PE' : 'Peru',
                    'PH' : 'Philippines',
                    'PN' : 'Pitcairn',
                    'PL' : 'Poland',
                    'PT' : 'Portugal',
                    'PR' : 'Puerto Rico',
                    'QA' : 'Qatar',
                    'RE' : 'Reunion',
                    'RO' : 'Romania',
                    'RU' : 'Russian Federation',
                    'RW' : 'Rwanda',
                    'BL' : 'Saint Barthelemy',
                    'SH' : 'Saint Helena',
                    'KN' : 'Saint Kitts And Nevis',
                    'LC' : 'Saint Lucia',
                    'MF' : 'Saint Martin',
                    'PM' : 'Saint Pierre And Miquelon',
                    'VC' : 'Saint Vincent And Grenadines',
                    'WS' : 'Samoa',
                    'SM' : 'San Marino',
                    'ST' : 'Sao Tome And Principe',
                    'SA' : 'Saudi Arabia',
                    'SN' : 'Senegal',
                    'RS' : 'Serbia',
                    'SC' : 'Seychelles',
                    'SL' : 'Sierra Leone',
                    'SG' : 'Singapore',
                    'SK' : 'Slovakia',
                    'SI' : 'Slovenia',
                    'SB' : 'Solomon Islands',
                    'SO' : 'Somalia',
                    'ZA' : 'South Africa',
                    'GS' : 'South Georgia And Sandwich Isl.',
                    'ES' : 'Spain',
                    'LK' : 'Sri Lanka',
                    'SD' : 'Sudan',
                    'SR' : 'Suriname',
                    'SJ' : 'Svalbard And Jan Mayen',
                    'SZ' : 'Swaziland',
                    'SE' : 'Sweden',
                    'CH' : 'Switzerland',
                    'SY' : 'Syrian Arab Republic',
                    'TW' : 'Taiwan',
                    'TJ' : 'Tajikistan',
                    'TZ' : 'Tanzania',
                    'TH' : 'Thailand',
                    'TL' : 'Timor-Leste',
                    'TG' : 'Togo',
                    'TK' : 'Tokelau',
                    'TO' : 'Tonga',
                    'TT' : 'Trinidad And Tobago',
                    'TN' : 'Tunisia',
                    'TR' : 'Turkey',
                    'TM' : 'Turkmenistan',
                    'TC' : 'Turks And Caicos Islands',
                    'TV' : 'Tuvalu',
                    'UG' : 'Uganda',
                    'UA' : 'Ukraine',
                    'AE' : 'United Arab Emirates',
                    'GB' : 'United Kingdom',
                    'UK' : 'United Kingdom',
                    'US' : 'United States',
                    'UM' : 'United States Outlying Islands',
                    'UY' : 'Uruguay',
                    'UZ' : 'Uzbekistan',
                    'VU' : 'Vanuatu',
                    'VE' : 'Venezuela',
                    'VN' : 'Viet Nam',
                    'VG' : 'Virgin Islands, British',
                    'VI' : 'Virgin Islands, U.S.',
                    'WF' : 'Wallis And Futuna',
                    'EH' : 'Western Sahara',
                    'YE' : 'Yemen',
                    'ZM' : 'Zambia',
                    'ZW' : 'Zimbabwe'
                };
                
                if (country.startsWith("en:")) {
                    country = country.substr(3);
                }
                
                switch(country.toLowerCase()) {
                    case "de:deutschland":
                        country = "germany";
                        break;
                    case "de:france":
                        country = "france";
                        break;
                    case "de:weltweit":
                        country = "worldwide";
                        break;
                    case "fr:pologne":
                        country = "poland";
                        break;
                    case "fr:québec":
                        country = "canada";
                        break;
                    case "quebec":
                        country = "canada";
                        break;
                    case "québec":
                        country = "canada";
                        break;
                    case "deutschland":
                        country = "germany";
                        break;
                    case "other:日本":
                        country = "japan";
                        break;
                    case "usa":
                        country = "united states";
                        break;
                    case "suisse":
                        country = "switzerland";
                        break;
                    case "españa":
                        country = "spain";
                        break;
                    case "espana":
                        country = "spain";
                        break;
                    case "espanha":
                        country = "spain";
                        break;
                    case "espanya":
                        country = "spain";
                        break;
                    case "nederland":
                        country = "netherlands";
                        break;
                    case "россия":
                        country = "russia";
                        break;
                }
                
                if (isoCountries.hasOwnProperty(country.toUpperCase())) {
                    country = isoCountries[country.toUpperCase()];
                }
                
                emit(country.toLowerCase(), 1);
            }
            
        } else {
            emit("None", 1);
        }
        
    }""")
reducer = Code("""
    function (key, values) {
        var total = 0;
        for (var i = 0; i < values.length; i++) {
            total += values[i];
        }
        return total;
    }""")

countries = {}
countries_stats = openfood.inline_map_reduce(mapper, reducer)
for each in countries_stats:
    countries[each['_id']] = each['value']
#     print each['_id'], each['value']

In [None]:
countries_sorted = sorted(countries.items(), key=operator.itemgetter(1), reverse=True)

x = []
y = []

for i in range(100):
    x.append(countries_sorted[i][0])
    y.append(countries_sorted[i][1])

countries_graph = [go.Bar(
    x=y, y=x,
    orientation = 'h'
)]

py.offline.iplot(countries_graph, filename='horizontal-bar')

Output the stats to csv.

In [None]:
import unicodecsv

with open('countries_stats.csv', 'ab') as f:
    writer = unicodecsv.writer(f)
    
    for i in range(len(countries.items())):
        data = []
        data.append(countries.items()[i][0])
        data.append(countries.items()[i][1])
        writer.writerow(data)

Churning out language stats. But it does not represent correctly with the data language.

In [None]:
mapper = Code("""
    function () {
        if (typeof this.languages_codes !== 'undefined' && this.languages_codes !== {}) {
            for (var key in this.languages_codes) {
                emit(key, 1);
            }
        }
    }""")
reducer = Code("""
    function (key, values) {
        var total = 0;
        for (var i = 0; i < values.length; i++) {
            total += values[i];
        }
        return total;
    }""")

language_count = 0
language = {}
language_stats = openfood.inline_map_reduce(mapper, reducer)
for each in language_stats:
    language[each['_id']] = each['value']
#     print each['_id'], each['value']
    language_count += each['value']

print "Total:", language_count

In [None]:
with open('languages_stats.csv', 'ab') as f:
    writer = unicodecsv.writer(f)
    
    for i in range(len(language.items())):
        data = []
        data.append(language.items()[i][0])
        data.append(language.items()[i][1])
        writer.writerow(data)

In [None]:
mapper = Code("""
    function () {
        if (typeof this.nutrient_levels !== 'undefined' && this.nutrient_levels !== {}) {
            emit(this.countries, 1);
        }
    }""")
reducer = Code("""
    function (key, values) {
        var total = 0;
        for (var i = 0; i < values.length; i++) {
            total += values[i];
        }
        return total;
    }""")

language_count = 0
# language = {}
language_stats = openfood.inline_map_reduce(mapper, reducer)
for each in language_stats:
#     language[each['_id']] = each['value']
    print each['_id'], each['value']
#     language_count += each['value']

print "Total:", language_count