In [8]:
# Imports
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [9]:
# Config matplotlib plot
%matplotlib inline
plt.rcParams['figure.figsize'] = (40, 20)
plt.rcParams['axes.titlesize'] = (30)
plt.rcParams['axes.titlepad'] = (10)
plt.rcParams['axes.labelsize'] = (20)

In [10]:
# Import data
filename = 'OMA-project-data.csv'
df = pd.read_csv(filename)
df = df.drop(["LINK", "TYPOLOGY_02", "@Photos", "Unnamed: 11", "IMG_", "URL STRING"], axis=1)

#add decade col
def create_decade(x):
    d = str(x['YEAR'])
    e = d[:3] + "0s"
    return e

df['DECADE'] = df.apply(create_decade, axis=1)

df.head(10)

Unnamed: 0,DESCRIPTION,YEAR,TYPE,STATUS,TYPOLOGY_01,REM?,TEAM_01,TEAM_02,TEAM_03,TEAM_04,...,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,DECADE
0,Delirious New York,1978,Publication,Publication,,True,,,,,...,,,,,,,,,,1970s
1,Dutch Parliament Extension,1978,Project,Competition,Office,True,Zaha Hadid,Richard Perlemutter,Ron Steiner,Elias Veneris,...,,,,,,,,,,1970s
2,Irish Prime Minister's Residence,1979,Project,Competition,Office,True,Alan Forster,Stefano de Martino,Ron Steiner,Elia Zenghelis,...,,,,,,,,,,1970s
3,Boompjes,1980,Project,Commissioned Study,Residential,True,Kees Christiaanse,Gerard Comello,Stefano de Martino,Jeroen Thomas,...,,,,,,,,,,1980s
4,Kochstrasse / Friedrichstrasse Housing,1980,Project,Competition,Residential,True,Herman de Kovel,Stefano de Martino,Richard Perlemutter,Ricardo Simonini,...,,,,,,,,,,1980s
5,Koepel Panopticon Prison,1980,Project,Commissioned Study,Infrastructure,True,Mike Guyer,Thijs de Haan,Vahé Kalousdian,Brigitte Kochta,...,,,,,,,,,,1980s
6,Lützowstrasse Housing,1980,Project,Competition,Residential,False,Norman Chang,Omri Eytan,Katerina Galani,Andreas Kourkoulas,...,,,,,,,,,,1980s
7,Parc de la Villette,1982,Project,Competition,Landscape,True,Kees Christiaanse,Stefano de Martino,Ruurd Roorda,Ron Steiner,...,,,,,,,,,,1980s
8,Exposition Universelle,1983,Project,Design Development,Masterplan,True,Kees Christiaanse,Stefano de Martino,Willem-Jan Neutelings,Ron Steiner,...,,,,,,,,,,1980s
9,Churchillplein,1984,Project,Competition,Office,True,Kees Christiaanse,Jaap van Heest,Götz Keller,Jeroen Thomas,...,,,,,,,,,,1980s


In [11]:
status_dict = {
               "Commissioned Study": 1,
               "Competition": 2,
               "Completed": 4,
               "Construction": 2,
               "Demolished": 2,
               "Design Development": 2,
               "Lecture": 0,
               "Publication": 4,
               "Study": 0
               }

typology_dict = {
                 "N/A": 0,
                 "Arena": 0,
                 "Branding": 1,
                 "Education": 2,
                 "Exhibition": 2,
                 "Hotel": 2,
                 "Industrial": 0,
                 "Infrastructure": 0,
                 "Landscape": 2,
                 "Library": 3,
                 "Masterplan": 3,
                 "Mixed use": 4,
                 "Museum / Gallery": 3,
                 "Office": 0,
                 "Product Design": 0,
                 "Research": 0,
                 "Residential": 1,
                 "Restaurant / Bar": 0,
                 "Retail": 2,
                 "Scenography": 3,
                 "Theatre": 3,
                 "Tower": 0
                 }

decades_dict = {
                "1970s": 5,
                "1980s": 3,
                "1990s": 1,
                "2000s": 0
                }


dict_dict = {
            "STATUS": status_dict,
            "TYPOLOGY_01": typology_dict,
            "DECADE": decades_dict
            }

In [12]:
def set_value (dictionary, key):
    max_dict = max([i for i in dictionary.values()])
    x = dictionary[key] / max_dict
    return x

def importance_factor(y):
    z = []
    
    for k in dict_dict:
        try:
            m = dict_dict[k]
            n = y[k]
            z.append(set_value(m, n))
        except:
            pass
    
    a = float(len(z))
    b = float(sum(z))
    return (b/a)

In [13]:
df['IMPORTANCE'] = df.apply(importance_factor, axis=1)

df.sort_values("IMPORTANCE", ascending=False).head(1)

Unnamed: 0,DESCRIPTION,YEAR,TYPE,STATUS,TYPOLOGY_01,REM?,TEAM_01,TEAM_02,TEAM_03,TEAM_04,...,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,DECADE,IMPORTANCE
0,Delirious New York,1978,Publication,Publication,,True,,,,,...,,,,,,,,,1970s,1.0


In [14]:
# Organise columns
df["TEAM LIST"] = np.nan

cols = df.columns.tolist()
cols = cols[-3:] + cols[:-3]
df = df[cols]
df.head(5)

Unnamed: 0,DECADE,IMPORTANCE,TEAM LIST,DESCRIPTION,YEAR,TYPE,STATUS,TYPOLOGY_01,REM?,TEAM_01,...,Unnamed: 100,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109
0,1970s,1.0,,Delirious New York,1978,Publication,Publication,,True,,...,,,,,,,,,,
1,1970s,0.333333,,Dutch Parliament Extension,1978,Project,Competition,Office,True,Zaha Hadid,...,,,,,,,,,,
2,1970s,0.333333,,Irish Prime Minister's Residence,1979,Project,Competition,Office,True,Alan Forster,...,,,,,,,,,,
3,1980s,0.0,,Boompjes,1980,Project,Commissioned Study,Residential,True,Kees Christiaanse,...,,,,,,,,,,
4,1980s,0.0,,Kochstrasse / Friedrichstrasse Housing,1980,Project,Competition,Residential,True,Herman de Kovel,...,,,,,,,,,,


In [15]:
team_cols = df.loc[:, 'TEAM_01':].columns.tolist()
team_cols_len = len(team_cols)

def create_team(x):
    y = []
    for i in team_cols:
        y.append(x[i])
    return y

df["TEAM LIST"] = df.apply(create_team, axis=1)

df.head(2)

Unnamed: 0,DECADE,IMPORTANCE,TEAM LIST,DESCRIPTION,YEAR,TYPE,STATUS,TYPOLOGY_01,REM?,TEAM_01,...,Unnamed: 100,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109
0,1970s,1.0,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",Delirious New York,1978,Publication,Publication,,True,,...,,,,,,,,,,
1,1970s,0.333333,"[Zaha Hadid, Richard Perlemutter, Ron Steiner,...",Dutch Parliament Extension,1978,Project,Competition,Office,True,Zaha Hadid,...,,,,,,,,,,


In [23]:
team_list = df["TEAM LIST"].tolist()

def find_names(name):
    j = []

    for i in team_list:
        j.append(name in i)
    return j

def name_frame(name):
    p = find_names(name)
    q = df[p]
    return q


In [24]:
# Load name list
name_file = 'key-people-list.txt'
file_open = open(name_file)
c = file_open.readlines()
name_list = []
for i in c:
    y = i.strip()
    name_list.append(y)
    
print name_list

['Generation-01_Christian Rapp', 'Generation-01_Edzo Bindels', 'Generation-01_Christophe Cornubert', 'Generation-01_Matthias Sauerbruch', 'Generation-01_Kees Christiaanse', 'Generation-01_Yushi Uehara', 'Generation-01_Zaha Hadid', 'Generation-01_Laurinda Spear', 'Generation-01_Ruurd Roorda', 'Generation-01_Williem Jan Neutelings', 'Generation-01_Winy Maas', 'Generation-01_Jacob Van Rijs', 'Generation-01_Sarah Whiting', 'Generation-01_Ron Witte', 'Generation-01_Mike Guyer', 'Generation-01_Alejandro Zaera Polo', 'Generation-01_Luc Reuse', 'Generation-01_Farshid Moussavi', 'Generation-01_Rients Dijkstra', 'Generation-02_Jeffery Inaba', 'Generation-02_Xaveer de Geyter', 'Generation-02_Amale Andraos', 'Generation-02_Dan Wood', 'Generation-02_Juliette Bekkering', 'Generation-02_John Mcmorrough', 'Generation-02_Matthias Hollwich', 'Generation-02_Jeanne Gang', 'Generation-02_Fernando Romero', 'Generation-02_Markus Schaefer', 'Generation-02_Zachary R.Heineman', 'Generation-02_Joshua Prince-Ramu

In [28]:
def process_names(d):
    e = d.split('_')
    f = name_frame(e[1])
    f["BRANCH KEY"] = e[1]
    f["KEY LIST"] = e[0]
    save_path = "people_output/" + d + ".csv"
    f.to_csv(save_path, index=False)
    print "Saved: " + save_path

count = 0
for g in name_list:
    process_names(g)
    count += 1
    
print "Processed: " + count

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Saved: people_output/Generation-01_Christian Rapp.csv
Saved: people_output/Generation-01_Edzo Bindels.csv
Saved: people_output/Generation-01_Christophe Cornubert.csv
Saved: people_output/Generation-01_Matthias Sauerbruch.csv
Saved: people_output/Generation-01_Kees Christiaanse.csv
Saved: people_output/Generation-01_Yushi Uehara.csv
Saved: people_output/Generation-01_Zaha Hadid.csv
Saved: people_output/Generation-01_Laurinda Spear.csv
Saved: people_output/Generation-01_Ruurd Roorda.csv
Saved: people_output/Generation-01_Williem Jan Neutelings.csv
Saved: people_output/Generation-01_Winy Maas.csv
Saved: people_output/Generation-01_Jacob Van Rijs.csv
Saved: people_output/Generation-01_Sarah Whiting.csv
Saved: people_output/Generation-01_Ron Witte.csv
Saved: people_output/Generation-01_Mike Guyer.csv
Saved: people_output/Generation-01_Alejandro Zaera Polo.csv
Saved: people_output/Generation-01_Luc Reuse.csv
Saved: people_output/Generation-01_Farshid Moussavi.csv
Saved: people_output/Generat