In [2]:
import pandas as pd; 
import numpy as np;
from datetime import datetime, date;
import json;

# Functions

In [22]:
#Function to merge intervals of time. From https://stackoverflow.com/questions/58570094/pandas-sum-time-interval-in-a-group-excluding-overlaps
def merge_intervals(intervals):
    sorted_by_lower_bound = sorted(intervals, key=lambda tup: tup[0])
    merged = []

    for higher in sorted_by_lower_bound:
        if not merged:
            merged.append(higher)
        else:
            lower = merged[-1]
            # test for intersection between lower and higher:
            # we know via sorting that lower[0] <= higher[0]
            if higher[0] <= lower[1]:
                upper_bound = max(lower[1], higher[1])
                merged[-1] = (lower[0], upper_bound)  # replace by merged interval
            else:
                merged.append(higher)
    return merged

In [None]:
#-------------------------------------------------Example----------------------------------------------
ids = [x for x in range(10)]
group = [0, 1, 1, 2, 2, 3, 4, 4, 4, 4]

start = pd.to_datetime(["2019-10-21-16:20:00", "2019-10-21-16:22:00", "2019-10-21-16:22:00", "2019-10-21-16:15:00",
         "2019-10-21-16:22:00", "2019-10-21-16:58:00", "2019-10-21-17:02:00", "2019-10-21-17:03:00",
         "2019-10-21-17:04:00", "2019-10-21-17:20:00"])

end = pd.to_datetime(["2019-10-21-16:25:00", "2019-10-21-16:24:00", "2019-10-21-16:24:00", "2019-10-21-16:18:00",
       "2019-10-21-16:26:00", "2019-10-21-17:02:00", "2019-10-21-17:06:00", "2019-10-21-17:07:00",
       "2019-10-21-17:08:00", "2019-10-21-17:22:00"])

cols = ["id", "group", "start", "end"]


df = pd.DataFrame(dict(zip(cols, [ids, group, start, end])))
print(df.head())
# Apply above function
df['dt'] = df[['start', 'end']].apply(tuple, axis=1) #Create a tuple (Date1,Date2)
op = df.groupby(['group'])['dt'].apply(list)  #Create of all intervals grouped.
f_op = op.apply(merge_intervals)   #Use above function to create a grouped list of min,max of each overlaped interval.

op_d = f_op.apply(lambda x: sum([(y[1]-y[0]).seconds for y in x]))
#############################################################################################################################

# Data

## Scenes

In [6]:
df_raw=pd.read_csv("scenes.csv",encoding = "ISO-8859-1",header=1,sep=";")
# dtype={'Selection starts': np.datetime64, 'Selection ends': np.datetime64}

In [7]:
df=df_raw.copy()
df.dtypes

scene               float64
scene_length        float64
description          object
location             object
sublocation          object
character            object
Selection starts     object
Selection ends       object
Diff.                object
State                object
Movement             object
Note                 object
dtype: object

In [8]:
df

Unnamed: 0,scene,scene_length,description,location,sublocation,character,Selection starts,Selection ends,Diff.,State,Movement,Note
0,0.0,45.0,initial Credits,,,,0:00:00,0:00:45,0:00:45,,,
1,1.0,,Family Introduction -No Wifi,Kim's House,Living room,Ki Woo,0:00:45,0:01:40,0:00:55,,,
2,1.0,,Family Introduction -No Wifi,Kim's House,Corridor,Ki Woo,0:01:40,0:02:10,0:00:30,,,
3,1.0,,Ki Taek quicks insect,Kim's House,Living room,Ki Woo,0:02:14,0:02:20,0:00:06,,,
4,1.0,,,Kim's House,Corridor,Ki Woo,0:02:20,0:02:24,0:00:04,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
507,10.0,,Ending scene,Kim's House,Living room,Ki Woo,2:06:24,2:06:55,0:00:31,,,
508,11.0,,Credits,,,,2:06:55,2:12:00,0:05:05,,,
509,,,,,,,,,,,,
510,,,,,,,,,,,,


### Preprocessing

In [None]:
#df[['Selection starts','Selection ends']]=df[['Selection starts','Selection ends']].astype(str)
#df['Selection starts'].str.split('m').str[0]

In [9]:
#Delete excess rows
df.dropna(axis=0,how="all",inplace=True)
#Create Datetimes
df[['Selection starts','Selection ends']]=df[['Selection starts','Selection ends']].apply(pd.to_datetime)
#Create Date Difference column
df['difference']=df['Selection ends']-df['Selection starts']
df['difference']=df['difference'].dt.seconds

## Characters

In [None]:
ch_raw=pd.read_csv("characters.csv",encoding = "ISO-8859-1",sep=";")

In [None]:
ch=ch_raw.copy()

# Scenes

## Force layout

In [30]:
#Filter Park House
df["location"].value_counts()
df_filt=df[df["location"]=="Park's House"]
df_filt["sublocation"]=df_filt["sublocation"].str.lower()
#df_filt.head()
#All the duration of characters in the same scene+sublocation should be grouped.
df_gr=df_filt.groupby(["scene","character","sublocation"],as_index=False).agg({"difference":"sum"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [None]:
df_gr.head(50)
print(df_gr.dtypes)
df_gr.difference.describe()

In [31]:
#replace some characters to match the name of the center elements in the svg
df_gr["sublocation"].replace(" ",'_',regex=True,inplace=True)
df_gr["sublocation"].replace('&','',regex=True,inplace=True)
df_gr["sublocation"].replace('\\._','_',regex=True,inplace=True)
df_gr["sublocation"].replace("'",'_',regex=True,inplace=True)

In [32]:
#This is the first time we use the merge_intervals function. There might be shorter ways of calculating it. I just wanted to
#use this method.
df_filt['dt_sub'] = df_filt[['Selection starts', 'Selection ends']].apply(tuple, axis=1) #Create a tuple (Date1,Date2)
scene_selection = df_filt.groupby(["scene"])['dt_sub'].apply(list)  #Create of all intervals grouped.
f_scene_selection = scene_selection.apply(merge_intervals)   #Use above function to create a grouped list of min,max of each overlaped interval.

time_scene = pd.DataFrame(f_scene_selection.apply(lambda x: sum([(y[1]-y[0]).seconds for y in x])))
time_scene.reset_index(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [33]:
#check=df_filt[df_filt["sublocation"].isin(["garden","entrance","basement"])]
df_gr=df_gr[~df_gr["sublocation"].isin(["toilet","sauna_room"])]#,"entrance-intercom"
df_gr.sublocation.value_counts()

kitchen                   35
living_room               29
entrance                  29
garden                    23
entrance-stairs           18
entrance-stairs_garage    15
da_hye_s_room             14
entrance-street           13
basement                  13
first_floor-corridor      11
garage                    11
cave                      10
entrance-intercom          8
da_song_s_room             6
secondary_garden           4
mr__mrs_park_room          4
Name: sublocation, dtype: int64

In [34]:
#Merge df_gr with time_scene
df_gr=time_scene.merge(df_gr,on="scene")

In [35]:
df_gr.to_json("data_out/scenes.json",orient='records');
#check.to_json("data_out/scenes.json",orient='records');

## Movement

In [91]:
df_filt["Movement"]=df_filt["Movement"].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [148]:
#Filter specific movement
df_move= df_filt[df_filt["Movement"].str.contains("up")]
df_move=df_move.groupby(["scene","character"],as_index=False).agg({"difference":"sum"})
df_move.rename({"difference":"move_time"},axis=1,inplace=True)

In [149]:
#How much time does every character appears in a scene:
df_character_scene=df_filt.groupby(["scene","character"],as_index=False).agg({"difference":"sum"})
df_character_scene.rename({"difference":"scene_time"},axis=1,inplace=True)

In [150]:
df_move_mg=df_move.merge(df_character_scene,on=["scene","character"],how="right")
df_move_mg.fillna(0,inplace=True)
df_move_mg["pct"] = np.round(100*(df_move_mg["move_time"] / df_move_mg["scene_time"]),1)
df_move_mg.sort_values(["scene","move_time"],ascending=[True,False],inplace=True)
# To see how much time per scene a character is moving in a specific direction.
df_move_mg

Unnamed: 0,scene,character,move_time,scene_time,pct
0,3.0,Ki Woo,51.0,489,10.4
1,3.0,Yeon Kyo,8.0,394,2.0
28,3.0,Da Hye,0.0,87,0.0
29,3.0,Da Song,0.0,95,0.0
30,3.0,Moon Gwang,0.0,186,0.0
2,4.0,Da Hye,41.0,247,16.6
7,4.0,Moon Gwang,28.0,102,27.5
5,4.0,Ki Jung,22.0,272,8.1
6,4.0,Ki Woo,22.0,271,8.1
3,4.0,Da Song,17.0,63,27.0


In [152]:
df_move_mg.sort_values(["scene","pct"],ascending=[True,False],inplace=True)
#df_move_mg

Unnamed: 0,scene,character,move_time,scene_time,pct
0,3.0,Ki Woo,51.0,489,10.4
1,3.0,Yeon Kyo,8.0,394,2.0
28,3.0,Da Hye,0.0,87,0.0
29,3.0,Da Song,0.0,95,0.0
30,3.0,Moon Gwang,0.0,186,0.0
7,4.0,Moon Gwang,28.0,102,27.5
3,4.0,Da Song,17.0,63,27.0
8,4.0,Mr. Park,8.0,32,25.0
2,4.0,Da Hye,41.0,247,16.6
4,4.0,First chaufer,4.0,30,13.3


In [153]:
df_move_film=df_move_mg.groupby("character",as_index=False).agg({"move_time":"sum","scene_time":"sum"})
df_move_film["pct"] = np.round(100*(df_move_film["move_time"] / df_move_film["scene_time"]),1)
df_move_film.sort_values(["move_time"],ascending=False,inplace=True)
df_move_film

Unnamed: 0,character,move_time,scene_time,pct
4,Geun Se,480.0,1491,32.2
7,Ki Woo,239.0,3271,7.3
8,Moon Gwang,140.0,1832,7.6
11,Yeon Kyo,66.0,2511,2.6
6,Ki Taek,53.0,3047,1.7
1,Da Hye,49.0,1101,4.5
5,Ki Jung,22.0,2956,0.7
2,Da Song,21.0,1140,1.8
9,Mr. Park,21.0,1273,1.6
10,Party attendants,12.0,664,1.8


In [134]:
df_move_film.sort_values(["pct"],ascending=False,inplace=True)
df_move_film

Unnamed: 0,character,move_time,scene_time,pct
10,Party attendants,664.0,664,100.0
4,Geun Se,1485.0,1491,99.6
9,Mr. Park,1184.0,1273,93.0
8,Moon Gwang,1676.0,1832,91.5
3,First chaufer,26.0,30,86.7
2,Da Song,935.0,1140,82.0
11,Yeon Kyo,1946.0,2511,77.5
0,Chung Sook,1720.0,2275,75.6
6,Ki Taek,2290.0,3047,75.2
7,Ki Woo,2166.0,3271,66.2


In [None]:
#Alternative
pat = 'up'
grouped = df_filt.groupby(df_filt.columns.str.extract(pat, expand=False), axis=1)

In [67]:
df_filt.groupby(["scene","character","Movement"],as_index=False).agg({"difference":"sum"})

Unnamed: 0,scene,character,Movement,difference
0,3.0,Da Hye,Static,87
1,3.0,Da Song,Down,9
2,3.0,Da Song,Move,86
3,3.0,Ki Woo,Move,103
4,3.0,Ki Woo,"Move,Down",25
...,...,...,...,...
175,10.0,Ki Taek,Up,10
176,10.0,Ki Taek,Up,13
177,10.0,Ki Woo,Move,79
178,10.0,Ki Woo,"Move,Up",7


In [65]:
df_filt.groupby(["scene","character","sublocation","Movement"],as_index=False).agg({"difference":"sum"})

Unnamed: 0,scene,character,sublocation,Movement,difference
0,3.0,Da Hye,da hye's room,Static,87
1,3.0,Da Song,entrance,Move,17
2,3.0,Da Song,entrance-stairs,Down,9
3,3.0,Da Song,garden,Move,34
4,3.0,Da Song,living room,Move,35
...,...,...,...,...,...
282,10.0,Ki Taek,kitchen,Move,49
283,10.0,Ki Taek,living room,Move,14
284,10.0,Ki Woo,entrance-intercom,"Up,Static",21
285,10.0,Ki Woo,entrance-street,"Move,Up",7


In [62]:
df_filt.head()

Unnamed: 0,scene,scene_length,description,location,sublocation,character,Selection starts,Selection ends,Diff.,State,Movement,Note,difference,dt_sub
46,3.0,,Ki Woo Arrives at Park's House,Park's House,entrance-street,Ki Woo,2020-11-04 00:12:18,2020-11-04 00:12:33,0:00:15,,"Move,Up",,15,"(2020-11-04 00:12:18, 2020-11-04 00:12:33)"
47,3.0,,Ki Woo Arrives at Park's House,Park's House,entrance-intercom,Ki Woo,2020-11-04 00:12:33,2020-11-04 00:13:01,0:00:28,,Up,,28,"(2020-11-04 00:12:33, 2020-11-04 00:13:01)"
48,3.0,,Ki Woo Arrives at Park's House,Park's House,entrance,Ki Woo,2020-11-04 00:13:01,2020-11-04 00:13:52,0:00:51,,Move,,51,"(2020-11-04 00:13:01, 2020-11-04 00:13:52)"
49,3.0,,Ki Woo Arrives at Park's House,Park's House,entrance,Moon Gwang,2020-11-04 00:13:19,2020-11-04 00:13:52,0:00:33,,Move,,33,"(2020-11-04 00:13:19, 2020-11-04 00:13:52)"
50,3.0,,Ki Woo Arrives at Park's House+ Interview,Park's House,kitchen,Ki Woo,2020-11-04 00:13:52,2020-11-04 00:15:35,0:01:43,,"Move,Static",,103,"(2020-11-04 00:13:52, 2020-11-04 00:15:35)"


# Locations in the film

Due to the DB schema there are moments in the film where characters are in different locations and counted in both of them. Though the Date of an opening character in a scene and the closing one are the first and last moments in which the location appears. 

In [317]:
df_loc=df.copy()
df_loc.head(10)

In [325]:
#First method using the dissapeared locationid attribute =IF(E4=E3;D3;D3+1)
df_loc_gr=df_loc.groupby(["locationid","location"],as_index=False).agg({"Selection starts":"first","Selection ends":"last"})
df_loc_gr['difference_loc']=df_loc_gr['Selection ends']-df_loc_gr['Selection starts']
#df['difference_loc']=df['difference_loc'].dt.seconds
pd.set_option('display.max_rows', 73)
#df_loc_gr.difference_loc.sum() #To check whether the duration is similar to the film one.
df_loc_gr.head()

Unnamed: 0,locationid,location,Selection starts,Selection ends,difference_loc
0,1.0,Kim's House,2020-11-02 00:00:45,2020-11-02 00:04:18,00:03:33
1,2.0,Street by Kim's house,2020-11-02 00:04:18,2020-11-02 00:05:46,00:01:28
2,3.0,Kim's House,2020-11-02 00:05:46,2020-11-02 00:08:04,00:02:18
3,4.0,Street by Kim's house,2020-11-02 00:06:15,2020-11-02 00:06:51,00:00:36
4,5.0,Kim's House,2020-11-02 00:04:38,2020-11-02 00:04:44,00:00:06


In [326]:
df_loc_sum=df_loc_gr.groupby(["location"]).agg({"difference_loc":"sum"})
df_loc_sum["pct"] = df_loc_sum.apply(lambda x:100 * x / x.sum())

df_loc_sum.sort_values("difference_loc",ascending=False)

Unnamed: 0_level_0,difference_loc,pct
location,Unnamed: 1_level_1,Unnamed: 2_level_1
Park's House,01:25:38,66.236947
Kim's House,00:14:38,11.318809
Mr.Park's car,00:07:57,6.149285
Stadium,00:04:05,3.158438
Street by Kim's house,00:03:44,2.887714
Grocery shop,00:03:01,2.333376
Seoul streets,00:02:53,2.230244
Hospital,00:01:23,1.070001
Shopping Mall,00:01:13,0.941085
Buffet restaurant,00:01:10,0.902411


In [425]:
#Second method. Without locationid
df_loc['dt'] = df_loc[['Selection starts', 'Selection ends']].apply(tuple, axis=1) #Create a tuple (Date1,Date2)
list_selection = df_loc.groupby(["location"])['dt'].apply(list)  #Create of all intervals grouped.
f_list_selection = list_selection.apply(merge_intervals)   #Use above function to create a grouped list of min,max of each overlaped interval.

time_locat = pd.DataFrame(f_list_selection.apply(lambda x: sum([(y[1]-y[0]).seconds for y in x])))
time_locat.reset_index(inplace=True)

In [426]:
time_loc=time_locat.groupby("location").agg({"dt":"sum"})
time_loc["pct"] = time_loc.apply(lambda x:np.round(100 * x / x.sum(),1))
#time_loc
time_loc=time_loc.reset_index().sort_values("pct",ascending=False)
time_loc

Unnamed: 0,location,dt,pct
12,Park's House,5259,65.8
9,Kim's House,884,11.1
11,Mr.Park's car,477,6.0
17,Stadium,245,3.1
18,Street by Kim's house,224,2.8
6,Grocery shop,181,2.3
15,Seoul streets,173,2.2
10,Mr's Park Office,145,1.8
7,Hospital,83,1.0
16,Shopping Mall,73,0.9


In [427]:
#Grouping similar numbers.
time_loc.loc[~time_loc.location.isin(["Park's House","Kim's House","Mr.Park's car","Street by Kim's house","Stadium"]),"location"]="Other Seoul locations"
time_loc.loc[time_loc.location.isin(["Kim's House","Street by Kim's house"]),"location"]="Kim's House"

In [429]:
time_loc=time_loc.groupby("location",as_index=True).agg({"pct":"sum"})
time_loc

## Sublocations in Park House

In [26]:
df_loc_filt=df[df["location"]=="Park's House"]
df_loc_filt_dup=df_loc_filt.copy()

In [388]:
#print(df_loc_filt.shape)
#Actually not really needed with the function used above.
#df_loc_filt_dup=df_loc_filt.drop_duplicates(subset=['locationid', 'sublocation','Selection starts','Selection ends'], keep='last')
#print(df_loc_filt_dup.shape)


In [206]:
#First approach discarded: Create intervals
#df_loc_filt_dup["interval"]=[pd.Interval(j["Selection starts"],j["Selection ends"],closed='both') for i,j in df_loc_filt_dup.iterrows()]
#df_loc_filt_dup["interval"][46].overlaps(df_loc_filt_dup["interval"][47])
#df_loc_filt_dup.head(20)

In [27]:
#The dataset created locates each character in the point that they are staying
#(when this information is provided) rather than their time in screen.
df_loc_filt_dup.sort_values("difference",ascending=False).head(20)

Unnamed: 0,scene,scene_length,description,locationid,location,sublocation,character,Selection starts,Selection ends,Diff.,State,Movement,Note,Unnamed: 13,difference
318,7.0,,Kim family attack + peach inoculation,48.0,Park's House,Living room,Ki Jung,2020-11-03 01:15:11,2020-11-03 01:30:52,0:15:41,,"Move,Static",,,941
441,8.0,,Kevin and Da Hye: Do i fit here?,60.0,Park's House,Garden,Party attendants,2020-11-03 01:45:09,2020-11-03 01:56:01,0:10:52,,"Move,Static",,,652
442,8.0,,Kevin and Da Hye: Do i fit here?,60.0,Park's House,Garden,Da Song,2020-11-03 01:45:09,2020-11-03 01:56:01,0:10:52,We don't see him but know he is there.,"Static,Move",,,652
445,8.0,,Dressing as indians to surprise to Da Song,60.0,Park's House,Garden,Ki Taek,2020-11-03 01:46:50,2020-11-03 01:56:01,0:09:11,,"Static,Move",,,551
284,7.0,,Drinking+ someone calls,48.0,Park's House,Living room,Ki Woo,2020-11-03 00:56:31,2020-11-03 01:05:07,0:08:36,,Static,,,516
283,7.0,,Drinking+ someone calls,48.0,Park's House,Living room,Ki Taek,2020-11-03 00:56:31,2020-11-03 01:05:07,0:08:36,,Static,,,516
285,7.0,,Drinking+ someone calls,48.0,Park's House,Living room,Ki Jung,2020-11-03 00:56:31,2020-11-03 01:05:07,0:08:36,,Static,,,516
446,8.0,,Dressing as indians to surprise to Da Song,60.0,Park's House,Garden,Mr. Park,2020-11-03 01:46:50,2020-11-03 01:55:10,0:08:20,death,"Static,Move",,,500
332,7.0,,Preparing Ram Don + locking Geun Se and Moon G...,50.0,Park's House,Cave,Geun Se,2020-11-03 01:16:54,2020-11-03 01:24:14,0:07:20,When Ki Taek leaves the cave. 1:49:21 till las...,"Down,Move,Static",,,440
344,7.0,,Bringing back diary to Da Hye + hiding in her ...,50.0,Park's House,Da Hye's room,Ki Woo,2020-11-03 01:17:53,2020-11-03 01:24:55,0:07:02,,"Move,Static",,,422


In [28]:
df_loc_filt_dup['dt_sub'] = df_loc_filt_dup[['Selection starts', 'Selection ends']].apply(tuple, axis=1) #Create a tuple (Date1,Date2)
list_selection = df_loc_filt_dup.groupby(["sublocation"])['dt_sub'].apply(list)  #Create of all intervals grouped.
f_list_selection = list_selection.apply(merge_intervals)   #Use above function to create a grouped list of min,max of each overlaped interval.

time_locat = pd.DataFrame(f_list_selection.apply(lambda x: sum([(y[1]-y[0]).seconds for y in x])))
time_locat.reset_index(inplace=True)

In [29]:
time_locat.sort_values("dt_sub",ascending=False)

Unnamed: 0,sublocation,dt_sub
13,Living room,1941
12,Kitchen,1439
1,Cave,1168
11,Garden,1025
2,Da Hye's room,858
4,Entrance,421
0,Basement,337
5,Entrance-intercom,218
8,Entrance-street,166
6,Entrance-stairs,155


In [31]:
time_subloc=time_locat.groupby("sublocation").agg({"dt_sub":"sum"})
time_subloc["pct_sub"] = time_subloc.apply(lambda x:np.round(100 * x / x.sum(),1))
#time_subloc
time_subloc=time_subloc.reset_index().sort_values("pct_sub",ascending=False)
time_subloc["location"]="Park's House"

In [32]:
time_subloc

Unnamed: 0,sublocation,dt_sub,pct_sub,location
13,Living room,1941,23.7,Park's House
12,Kitchen,1439,17.5,Park's House
1,Cave,1168,14.2,Park's House
11,Garden,1025,12.5,Park's House
2,Da Hye's room,858,10.5,Park's House
4,Entrance,421,5.1,Park's House
0,Basement,337,4.1,Park's House
5,Entrance-intercom,218,2.7,Park's House
8,Entrance-street,166,2.0,Park's House
6,Entrance-stairs,155,1.9,Park's House


## Merge

In [433]:
time_loc_merg=time_loc.merge(time_subloc,on="location",how="left")
time_loc_merg

Unnamed: 0,location,pct,sublocation,dt_sub,pct_sub
0,Kim's House,13.9,,,
1,Mr.Park's car,6.0,,,
2,Other Seoul locations,11.5,,,
3,Park's House,65.8,Living room,1941.0,23.7
4,Park's House,65.8,Kitchen,1439.0,17.5
5,Park's House,65.8,Cave,1168.0,14.2
6,Park's House,65.8,Garden,1025.0,12.5
7,Park's House,65.8,Da Hye's room,858.0,10.5
8,Park's House,65.8,Entrance,421.0,5.1
9,Park's House,65.8,Basement,337.0,4.1


## Heat ocupation floor map

In [39]:
df_loc_filt=df[df["location"]=="Park's House"]

In [40]:
print(df_loc_filt["Selection ends"][46])
print((df_loc_filt["Selection ends"].max()-df_loc_filt["Selection starts"].min()))
print((df_loc_filt["Selection ends"].max()-df_loc_filt["Selection starts"].min()).seconds)


2020-11-04 00:12:33
0 days 01:54:06
6846


In [41]:
#Transforming the selection end into seconds
df_loc_filt["end_seconds"]=(df_loc_filt["Selection ends"].dt.hour*3600)+(df_loc_filt["Selection ends"].dt.minute*60)+(df_loc_filt["Selection ends"].dt.second)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [42]:
mini=738 #from df_loc_filt["Selection starts"].min()
duration=(df_loc_filt["Selection ends"].max()-df_loc_filt["Selection starts"].min()).seconds
duration

6846

In [43]:
df_loc_filt["end_seconds_norm"]=np.round(100*((df_loc_filt["end_seconds"]-mini)/(duration)),0)
df_loc_filt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,scene,scene_length,description,location,sublocation,character,Selection starts,Selection ends,Diff.,State,Movement,Note,difference,end_seconds,end_seconds_norm
46,3.0,,Ki Woo Arrives at Park's House,Park's House,Entrance-street,Ki Woo,2020-11-04 00:12:18,2020-11-04 00:12:33,0:00:15,,"Move,Up",,15,753,0.0
47,3.0,,Ki Woo Arrives at Park's House,Park's House,Entrance-intercom,Ki Woo,2020-11-04 00:12:33,2020-11-04 00:13:01,0:00:28,,Up,,28,781,1.0
48,3.0,,Ki Woo Arrives at Park's House,Park's House,Entrance,Ki Woo,2020-11-04 00:13:01,2020-11-04 00:13:52,0:00:51,,Move,,51,832,1.0
49,3.0,,Ki Woo Arrives at Park's House,Park's House,Entrance,Moon Gwang,2020-11-04 00:13:19,2020-11-04 00:13:52,0:00:33,,Move,,33,832,1.0
50,3.0,,Ki Woo Arrives at Park's House+ Interview,Park's House,Kitchen,Ki Woo,2020-11-04 00:13:52,2020-11-04 00:15:35,0:01:43,,"Move,Static",,103,935,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,10.0,,Ki Woo becomes rich,Park's House,Basement,Ki Taek,2020-11-04 02:05:15,2020-11-04 02:05:28,0:00:13,,Up,,13,7528,99.0
503,10.0,,Ki Woo becomes rich,Park's House,Kitchen,Ki Taek,2020-11-04 02:05:28,2020-11-04 02:05:47,0:00:19,,Move,,19,7547,99.0
504,10.0,,Ki Woo becomes rich,Park's House,Entrance,Ki Taek,2020-11-04 02:05:47,2020-11-04 02:05:54,0:00:07,,Move,,7,7554,100.0
505,10.0,,Ki Woo becomes rich,Park's House,Living room,Ki Taek,2020-11-04 02:05:54,2020-11-04 02:06:08,0:00:14,,Move,,14,7568,100.0


In [44]:
df_loc_filt['dt_sub_sl'] = df_loc_filt[['Selection starts', 'Selection ends']].apply(tuple, axis=1) #Create a tuple (Date1,Date2)
list_slider= df_loc_filt.groupby(["end_seconds_norm","sublocation"])['dt_sub_sl'].apply(list)  #Create of all intervals grouped.
f_list_slider = list_slider.apply(merge_intervals)   #Use above function to create a grouped list of min,max of each overlaped interval.

time_slider = pd.DataFrame(f_list_slider.apply(lambda x: sum([(y[1]-y[0]).seconds for y in x])))
time_slider.reset_index(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [55]:
time_slider_gr=time_slider.groupby(["end_seconds_norm","sublocation"]).agg({"dt_sub_sl":"sum"})#.unstack(fill_value=0).stack()
#Not needed a pctg column
#time_slider_gr["pct_slider"] = time_slider_gr.groupby(level=0).apply(lambda x:np.round(100 * (x / x.sum()),1))
time_slider_gr.reset_index(inplace=True)

In [56]:
time_slider_gr["sublocation"]=time_slider_gr["sublocation"].str.lower()
#replace some characters to match the name of the center elements in the svg
time_slider_gr["sublocation"].replace(" ",'_',regex=True,inplace=True)
time_slider_gr["sublocation"].replace('&','',regex=True,inplace=True)
time_slider_gr["sublocation"].replace('\\._','_',regex=True,inplace=True)
time_slider_gr["sublocation"].replace("'",'_',regex=True,inplace=True)

In [57]:
time_slider_gr=time_slider_gr[~time_slider_gr["sublocation"].isin(["toilet","sauna_room"])]#,"entrance-intercom"
time_slider_gr.sublocation.value_counts()

kitchen                   29
living_room               22
entrance                  19
entrance-stairs           15
garden                    14
basement                  13
entrance-street            9
cave                       9
da_hye_s_room              8
entrance-stairs_garage     8
first_floor-corridor       6
entrance-intercom          6
garage                     4
da_song_s_room             3
secondary_garden           2
mr__mrs_park_room          2
Name: sublocation, dtype: int64

In [59]:
#pd.set_option('display.max_rows', 10)
time_slider_gr.to_json("data_out/house-ocupation.json",orient='records');

# Characters Dataset

In [35]:
#ch.rename({"Link Image":"urlImage"},axis=1,inplace=True)
ch.dtypes
ch["id"]=ch["Character"].replace(" ",'_',regex=True)
ch.dropna(axis=0,subset=['urlImage'],inplace=True)

In [36]:
ch

Unnamed: 0,Character,Family,Actor,Role,urlImage,id
0,Ki Woo,Kim,Woo-sik Choi,English Teacher,https://github.com/javiersgdtu/javiersgvisual/...,Ki_Woo
1,Ki Jung,Kim,So-dam Park,Art Teacher,https://github.com/javiersgdtu/javiersgvisual/...,Ki_Jung
2,Ki Taek,Kim,Kang-ho Song,Chauffeur,https://github.com/javiersgdtu/javiersgvisual/...,Ki_Taek
3,Chung Sook,Kim,Hye-jin Jang,Housekeeper,https://raw.githubusercontent.com/javiersgdtu/...,Chung_Sook
4,Yeon Kyo,Park,Yeo-jeong Jo,Mother,https://github.com/javiersgdtu/javiersgvisual/...,Yeon_Kyo
5,Mr. Park,Park,Sun-kyun Lee,Father,https://github.com/javiersgdtu/javiersgvisual/...,Mr._Park
6,Da Hye,Park,Ji-so Jung,Daughter,https://github.com/javiersgdtu/javiersgvisual/...,Da_Hye
7,Da Song,Park,Hyun-jun Jung,Son,https://github.com/javiersgdtu/javiersgvisual/...,Da_Song
8,Moon Gwang,,Jeong-eun Lee,First Housekeeper,https://github.com/javiersgdtu/javiersgvisual/...,Moon_Gwang
9,Geun Se,,Myeong-hoon Park,First Housekeeper's husband,https://github.com/javiersgdtu/javiersgvisual/...,Geun_Se


In [37]:
ch.to_json("data_out/characters.json",orient='records');