# Cohort 4 Team Selection Guidance

This notebook presents the results of the Capstone Project Interest Survey and our suggested team groupings.

In [33]:
import os
import csv
import random

from itertools import groupby
from operator import itemgetter
from collections import Counter, defaultdict

In [3]:
FIXTURES    = os.path.join(os.getcwd(), 'fixtures')
PREFERENCES = os.path.join(FIXTURES, 'cohort4-preferences.csv')

In [5]:
def read_data(prefs=PREFERENCES):
    with open(prefs, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            yield row

In [6]:
# Extract team groups from CSV
teams = dict([
    (int(team), list(group)) for team, group 
    in groupby(read_data(), key=itemgetter('Team'))
])

# Print team member counts
for team, prefs in teams.items():
    print "{}: {} members".format(team, len(prefs))

1: 4 members
2: 5 members
3: 5 members
4: 4 members
5: 5 members
6: 4 members
7: 5 members


In [18]:
def extract_csv_field(teams, field):
    counts = defaultdict(Counter)
 
    # Loop over all teams
    for team, prefs in teams.items():
    
        # Loop over all member preferences
        for pref in prefs:
            
            # Skip no preference
            if field not in pref: continue
            
            # Count all comma separated prefs
            for val in pref[field].split(','):
                val = val.strip()
                counts[team][val] += 1
    
    return counts


def print_multi_prefs(counts, title="Preference"):
    title = "Team {} Counts".format(title)
    print title
    print "-"*len(title)
    
    for team, prefs in counts.items():
        print "Team {}:".format(team)
        for item in prefs.most_common():
            if not item[0]: continue
            print "    {}: {}".format(*item)
        print
    print

In [32]:
domains  = extract_csv_field(teams, 'Domains')
projects = extract_csv_field(teams, 'Project Types')

# print_multi_prefs(domains, title="Domain Preference")
# print_multi_prefs(projects, title="Project Type Preference")

## Team Selection Reports

In [52]:
def select_coordinator(teamno, teams=teams):
    """
    From the people who selected yes to being coordinator, choose random.
    """
    # Filter out people who didn't say yes to coordinator role.
    coords = filter(
        lambda p: p['Coordinator'].lower() == 'yes', 
        teams[teamno]
    )
    
    return random.choice(coords)['Name']


def mean_level(teamno, field, teams=teams):
    """
    Compute the mean level from the survey.
    """
    levels = [
        float(pref[field]) if pref[field] else 0.0
        for pref in teams[teamno]
    ]
    
    return sum(levels) / len(levels)
    

def print_report(teamno, teams=teams, domains=domains, projects=projects):
    # Create output structure
    output = []
    
    # Create Title Header
    title = "Team {} Selection Report".format(teamno)
    output.append(title)
    output.append("-"*len(title))
    output.append("")
    
    # Print out averages
    output.append(
        "  * Coordinator: {}".format(select_coordinator(teamno))
    )
    output.append("")
    output.append(
        "  * Mean Python Level: {}".format(
            mean_level(teamno, 'Python')
        )
    )
    output.append(
        "  * Mean R Level: {}".format(
            mean_level(teamno, 'R')
        )
    )
    output.append(
        "  * Mean CLI Level: {}".format(
            mean_level(teamno, 'Terminal')
        )
    )
    output.append(
        "  * Mean Project Level: {}".format(
            mean_level(teamno, 'Project Level')
        )
    )
    output.append("")
    
    # Print out member names
    output.append("  - Members:")
    output.extend([
        "    + {}".format(pref['Name']) 
        for pref in teams[teamno]
    ])
    output.append("")
    
    # Print out domain preferences
    output.append("  - Domains:")
    output.extend([
        "    + {}: {}".format(*prefs) 
        for prefs in domains[teamno].most_common()
    ])
    output.append("")
    
    # Print out project preferences
    output.append("  - Project Types:")
    output.extend([
        "    + {}: {}".format(*prefs) 
        for prefs in projects[teamno].most_common()
    ])
    output.append("")
    
    # Return report string
    return "\n".join(output)

In [44]:
print print_report(1)

Team 1 Selection Report
-----------------------

  * Coordinator: Madeline Tomchick

  * Mean Python Level: 1.75
  * Mean R Level: 3.25
  * Mean CLI Level: 2.75
  * Mean Project Level: 2.5

  - Members:
    + Madeline Tomchick
    + Andy Pollack
    + Mcihal
    + Nicholas Colucci

  - Domains:
    + Security: 4
    + Government/Social Data: 3
    + Retail/Industry: 2
    + Finance/Banking: 2
    + Health Care/Medicine: 1
    + Education: 1
    + Sports: 1

  - Project Types:
    + Statistical Modeling for Forecasting: 4
    + Text Analysis/Natural Language Processing: 4
    + Network Analysis: 3
    + Visualization/Visual Analytics: 3
    + Time Series Analysis: 2
    + Regression Analysis: 2
    + Clustering or Classification: 1
    + Rule/Association Mining: 1



In [45]:
print print_report(2)

Team 2 Selection Report
-----------------------

  * Coordinator: Jessi

  * Mean Python Level: 2.2
  * Mean R Level: 0.8
  * Mean CLI Level: 4.4
  * Mean Project Level: 3.0

  - Members:
    + Jessi
    + Fay Tang
    + Bret Gaulin
    + Amit Arora
    + Jose R Arocho

  - Domains:
    + Finance/Banking: 4
    + Health Care/Medicine: 2
    + Communication Networks (i work specifically in the area of satellite networks): 1
    + Government/Social Data: 1

  - Project Types:
    + Visualization/Visual Analytics: 5
    + Clustering or Classification: 4
    + Statistical Modeling for Forecasting: 4
    + Rule/Association Mining: 4
    + Network Analysis: 3
    + Time Series Analysis: 3
    + Regression Analysis: 2
    + Text Analysis/Natural Language Processing: 2



In [46]:
print print_report(3)

Team 3 Selection Report
-----------------------

  * Coordinator: Mai Huynh

  * Mean Python Level: 2.0
  * Mean R Level: 1.8
  * Mean CLI Level: 3.2
  * Mean Project Level: 1.8

  - Members:
    + Mark Kindig
    + Mai Huynh
    + Ed Gilliland
    + Lisa Federer
    + Jan Spoor

  - Domains:
    + Government/Social Data: 4
    + Health Care/Medicine: 3
    + Education: 2
    + Transportation: 1
    + Retail/Industry: 1
    + Energy: 1
    + Real Estate: 1
    + Finance/Banking: 1
    + history: 1

  - Project Types:
    + Time Series Analysis: 4
    + Visualization/Visual Analytics: 3
    + Statistical Modeling for Forecasting: 3
    + Network Analysis: 2
    + Clustering or Classification: 2
    + : 1
    + Regression Analysis: 1
    + Text Analysis/Natural Language Processing: 1
    + Rule/Association Mining: 1



In [47]:
print print_report(4)

Team 4 Selection Report
-----------------------

  * Coordinator: Jennifer Lubkin Chavez

  * Mean Python Level: 1.75
  * Mean R Level: 1.0
  * Mean CLI Level: 1.75
  * Mean Project Level: 3.25

  - Members:
    + Jennifer Lubkin Chavez
    + Joel Haas
    + Michael Brown
    + Carmen Smiley

  - Domains:
    + Government/Social Data: 4
    + Health Care/Medicine: 3
    + Education: 2
    + Anything social science-y: 1
    + Transportation: 1
    + Finance/Banking: 1

  - Project Types:
    + Visualization/Visual Analytics: 4
    + Text Analysis/Natural Language Processing: 3
    + Network Analysis: 2
    + Regression Analysis: 2
    + Clustering or Classification: 2
    + Time Series Analysis: 1
    + Statistical Modeling for Forecasting: 1
    + Rule/Association Mining: 1



In [48]:
print print_report(5)

Team 5 Selection Report
-----------------------

  * Coordinator: Monique Bates

  * Mean Python Level: 4.6
  * Mean R Level: 3.8
  * Mean CLI Level: 4.2
  * Mean Project Level: 3.4

  - Members:
    + Monique Bates
    + Ben Kean
    + Max Almodovar
    + Brandon Harris
    + Jenny Wang

  - Domains:
    + Transportation: 4
    + Sports: 4
    + Energy: 3
    + Finance/Banking: 3
    + Government/Social Data: 3
    + Anything interfacing with a computer: 1
    + Retail/Industry: 1
    + Security: 1
    + Technology: 1
    + Health Care/Medicine: 1
    + Mobile Apps: 1
    + Education: 1
    + Agriculture: 1

  - Project Types:
    + Visualization/Visual Analytics: 4
    + Statistical Modeling for Forecasting: 4
    + Time Series Analysis: 3
    + Network Analysis: 2
    + Regression Analysis: 2
    + Clustering or Classification: 2
    + Text Analysis/Natural Language Processing: 2
    + Rule/Association Mining: 2
    + Preference ranking systems: 1
    + Recommendation systems: 1
   

In [49]:
print print_report(6)

Team 6 Selection Report
-----------------------

  * Coordinator: Jawad Syedain

  * Mean Python Level: 1.75
  * Mean R Level: 0.75
  * Mean CLI Level: 1.75
  * Mean Project Level: 2.0

  - Members:
    + Jawad Syedain
    + Kyle Rossetti
    + Melissa Carson
    + Andrea Wise

  - Domains:
    + Government/Social Data: 4
    + Sports: 2
    + Energy: 1
    + Security: 1
    + Education: 1

  - Project Types:
    + Statistical Modeling for Forecasting: 3
    + Visualization/Visual Analytics: 2
    + Regression Analysis: 1
    + Text Analysis/Natural Language Processing: 1
    + : 1



In [50]:
print print_report(7)

Team 7 Selection Report
-----------------------

  * Coordinator: Jonathan Creem

  * Mean Python Level: 3.4
  * Mean R Level: 3.8
  * Mean CLI Level: 4.8
  * Mean Project Level: 3.2

  - Members:
    + Andrew Miller
    + Jonathan Creem
    + Michael Ebanks
    + Caroline Morris
    + Max Almodovar

  - Domains:
    + Government/Social Data: 4
    + Finance/Banking: 4
    + Security: 2
    + Transportation: 1
    + Retail/Industry: 1
    + Labor/Wages: 1
    + Health Care/Medicine: 1
    + Sports: 1

  - Project Types:
    + Regression Analysis: 4
    + Clustering or Classification: 4
    + Statistical Modeling for Forecasting: 4
    + Time Series Analysis: 3
    + Visualization/Visual Analytics: 2
    + Text Analysis/Natural Language Processing: 2
    + Network Analysis: 1
    + Rule/Association Mining: 1

