# User Report Notebook

This notebook will be used to test the generation of graphs and other visualizations that will be used in future user reports. 

NOTE: The format of the code is somewhat different than what will be ran through the python command prompt. This is done soley to fit the restrictions brought forth by jupyter notebooks (such as reading and writing files).


1) Copy class cell directly over

2) Copy new function cells directly over

In [260]:
import os
import shutil
import csv
import random
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join

In [261]:
class Directory:
    def __init__(self):
        self.users = []
        self.groups = []
        self.group_names = {2315:"Help Center",
                            2720:"Grand Valley Entrepreneurs",
                            2721:"Food and Agriculture Industries",
                            2910:"Central Mountain Entrepreneurs",
                            2986:"ExSW: Entrepreneurs of the Southwest",
                            2987:"SoCo Entrpreneurs",
                            3094:"Group Moderators",
                            3132:"Northeast Small Biz and Entrepreneurs",
                            3196:"Northwest Colorado Entrepreneurs",
                            3492:"Roaring Fork Valley Startups",
                            3507:"San Luis Valley Entrepreneurs",
                            3550:"Outdoor Industry Startups",
                            3671:"Rural Entrepreneurial Policy Coalition"}

        self.categories = {}
        self.categories['groups'] = []
        self.categories['expertise'] = []
        self.categories['industry'] = []
        self.categories['interests'] = []
        self.categories['resources'] = []


class User:
    def __init__(self, uid, first_name, last_name, email, last_active, created, count, score, groups, expertise, industry, interests, resources, location, stages):
        self.uid = uid #user id
        self.first_name = first_name #tracks name of play
        self.last_name = last_name
        self.email = email
        self.last_active = last_active #date profile was last active
        self.created = created #date profile was created
        self.count = count #number of times signed in
        self.score = score
        

        self.categories = {}
        self.categories['groups'] = groups
        self.categories['expertise'] = expertise
        self.categories['industry'] = industry
        self.categories['interests'] = interests
        self.categories['resources'] = resources
        self.categories['stages'] = stages
        
        self.location = location
        

class Group:
    def __init__(self, gid, name):
        self.gid = gid
        self.name = name
        self.members = []
        

In [268]:
def get_group(directory, gid):
    value = None
    for group in directory.groups:
        if group.gid == gid:
            value = group
            break
    return value
        
def add_group_member(directory, user, gid):
    gid = int(gid)
    group = get_group(directory, gid)
    if group == None:
        group = Group(gid, directory.group_names[gid])
        directory.groups.append(group)
    group.members.append(user)
    
def create_group_dicts(directory):
    
    #create pie chart of composition of each group and the specified areas of interest (locations, industries, etc)
    
    
    #NOTE: Stages (the stage a user is in within their career) is NOT an accurate report. 
    #Not all users, only a select few in fact, have filled this information out.
    #In act, a single user may make up for 4 or 5 different reported stages; keep this in mind
    
    group_dict = {}
    for group in directory.groups:
        
        group_dict[group.gid] = {}
        locations = {}
        industries = {}
        expertises = {}
        resources = {}
        stages = {}

        for user in group.members:
            
            location = user.location.split(",")[0]
            if location not in locations:
                locations[location] = 1
            else:
                locations[location] += 1
                
            for industry in user.categories['industry']:
                if industry not in industries:
                    industries[industry] = 1
                else:
                    industries[industry] += 1
            
            for expertise in user.categories['expertise']:
                if expertise not in expertises:
                    expertises[expertise] = 1
                else:
                    expertises[expertise] += 1
            
            for resource in user.categories['resources']:
                if resource not in resources:
                    resources[resource] = 1
                else:
                    resources[resource] += 1
            
            for stage in user.categories['stages']:
                if stage not in stages:
                    stages[stage] = 1
                else:
                    stages[stage] += 1 
                    
        group_dict[group.gid]["locations"] = dict(sorted(locations.items(), key=lambda item:item[1], reverse=True))
        group_dict[group.gid]["industries"] = dict(sorted(industries.items(), key=lambda item:item[1], reverse=True))
        group_dict[group.gid]["expertises"] = dict(sorted(expertises.items(), key=lambda item:item[1], reverse=True))
        group_dict[group.gid]["resources"] = dict(sorted(resources.items(), key=lambda item:item[1], reverse=True))
        group_dict[group.gid]["stages"] = dict(sorted(stages.items(), key=lambda item:item[1], reverse=True))
    
    return group_dict

In [269]:
def fill_directory(directory, category, data):
    for item in data:
        if item not in directory.categories[category]:
            directory.categories[category].append(item)


def fix_list(data):
    result = []

    for item in data:
        if type(item) == str:
            if len(item) > 0:
                if item[0] == ' ':
                    item = item[1:]
        if item != '':
            result.append(item)

    return result


def analysis(directory):
    for user in directory.users:
        print(user.last_name+","+user.first_name+": "+str(user.score))
        break

def read_users(directory):
    users = []
    df = pd.read_excel("User_export_2020-12-21.xlsx")
    df = df.fillna("")
    categories = list(df.columns)
#     print(categories.index("Last Job:_44dcc808_Business_Stage"))
    data = df.to_numpy()
    for row in data:

        uid = row[0]
        first_name = row[7]
        last_name = row[4]
        email = row[10]
        last_active = row[41].split(' ')[0]
        created = row[44].split(' ')[0]
        count = row[40]
    
        score = 0
        if row[119] != '':
            score = int(row[119])

        groups = fix_list(row[117].split(","))
        fill_directory(directory, 'groups', groups)

        expertise = fix_list(row[124].split(","))
        fill_directory(directory, 'expertise', expertise)

        industry = fix_list(row[128].split(","))
        fill_directory(directory, 'industry', industry)

        interests = fix_list(row[130].split(","))
        fill_directory(directory, 'interests', interests)

        resources = fix_list(row[123].split(","))
        fill_directory(directory, 'resources', resources)
        
        
        
        
        
        #==========================================================
        
        # If city is blank, we need to disect the full address to extract either the city name or the country they reside in
            #finding what state they reside in may be difficult, but is possible. May be completely unnecessary though!
            
        # If both are blank, the user has yet to fully join the network and NONE must be used for the loaction
        
        location = "NO RECORDED LOCATION"
        
        full_address = row[77] #full address
        state = row[79] #city
        
        if len(state) != 0:
            location = state
        else:
            if len(full_address) != 0:
                split = full_address.split(",")
                if len(split) > 3:
                    location = split[1]
                else:
                    location = split[0]

        
        
        stages = fix_list(row[87].split(","))
        fill_directory(directory, 'groups', groups)
        #==========================================================
        
        
        
        

        user = User(uid, first_name, last_name, email, last_active, created, count, score, groups, expertise, industry, interests, resources, location, stages)
        users.append(user)
        
        
        
        
        
        #==========================================================
        for gid in groups:
            add_group_member(directory, user, gid)
        #==========================================================
        
        
        
        

        

    return users

In [270]:
directory = Directory()
directory.users = read_users(directory)
directory.users.sort(key=lambda user:user.score, reverse=True)

In [271]:
analysis(directory)

Grande,Taylor: 14745


In [272]:
group_dicts = create_group_dicts(directory)