# **DATA TRANSFORMATION & MERGE.**

## **Libraries.**

In [58]:
import numpy as np
import pandas as pd
import re

## **Data transformation.**

### **Import clean datasets.**

In [59]:
actors = pd.read_csv('../data/actor_clean.csv')
categories = pd.read_csv('../data/category_clean.csv')
films = pd.read_csv('../data/film_clean.csv')
inventories = pd.read_csv('../data/inventory_clean.csv')
languages = pd.read_csv('../data/language_clean.csv')
hdd = pd.read_csv('../data/old_HDD_clean.csv')
rentals = pd.read_csv('../data/rental_clean.csv')

### **Actors-Films merge.**

In [60]:
actors

Unnamed: 0,actor_id,first_name,last_name
0,1,PENELOPE,GUINESS
1,2,NICK,WAHLBERG
2,3,ED,CHASE
3,4,JENNIFER,DAVIS
4,5,JOHNNY,LOLLOBRIGIDA
...,...,...,...
195,196,BELA,WALKEN
196,197,REESE,WEST
197,198,MARY,KEITEL
198,199,JULIA,FAWCETT


In [61]:
hdd

Unnamed: 0,first_name,last_name,title,release_year,category_id
0,PENELOPE,GUINESS,ACADEMY DINOSAUR,2006,6
1,PENELOPE,GUINESS,ANACONDA CONFESSIONS,2006,2
2,PENELOPE,GUINESS,ANGELS LIFE,2006,13
3,PENELOPE,GUINESS,BULWORTH COMMANDMENTS,2006,10
4,PENELOPE,GUINESS,CHEAPER CLYDE,2006,14
...,...,...,...,...,...
995,GOLDIE,BRODY,COMANCHEROS ENEMY,2006,3
996,GOLDIE,BRODY,DAISY MENAGERIE,2006,14
997,GOLDIE,BRODY,DESERT POSEIDON,2006,11
998,GOLDIE,BRODY,EVERYONE CRAFT,2006,9


In [62]:
films

Unnamed: 0,film_id,title,description,release_year,language,rental_duration,rental_rate,length,replacement_cost,rating,special_features
0,1,ACADEMY DINOSAUR,A Epic Drama of a Feminist And a Mad Scientist...,2006,English,6,0.99,86,20.99,PG,"Deleted Scenes,Behind the Scenes"
1,2,ACE GOLDFINGER,A Astounding Epistle of a Database Administrat...,2006,English,3,4.99,48,12.99,G,"Trailers,Deleted Scenes"
2,3,ADAPTATION HOLES,A Astounding Reflection of a Lumberjack And a ...,2006,English,7,2.99,50,18.99,NC-17,"Trailers,Deleted Scenes"
3,4,AFFAIR PREJUDICE,A Fanciful Documentary of a Frisbee And a Lumb...,2006,English,5,2.99,117,26.99,G,"Commentaries,Behind the Scenes"
4,5,AFRICAN EGG,A Fast-Paced Documentary of a Pastry Chef And ...,2006,English,6,2.99,130,22.99,G,Deleted Scenes
...,...,...,...,...,...,...,...,...,...,...,...
995,996,YOUNG LANGUAGE,A Unbelieveable Yarn of a Boat And a Database ...,2006,English,6,0.99,183,9.99,G,"Trailers,Behind the Scenes"
996,997,YOUTH KICK,A Touching Drama of a Teacher And a Cat who mu...,2006,English,4,0.99,179,14.99,NC-17,"Trailers,Behind the Scenes"
997,998,ZHIVAGO CORE,A Fateful Yarn of a Composer And a Man who mus...,2006,English,6,0.99,105,10.99,NC-17,Deleted Scenes
998,999,ZOOLANDER FICTION,A Fateful Reflection of a Waitress And a Boat ...,2006,English,5,2.99,101,28.99,R,"Trailers,Deleted Scenes"


**We will perform the fusion between "Actor" and "Film" dataframes through "old_HDD", since it contains common columns to be able to execute it.**

In [63]:
merged_actors_hdd = pd.merge(actors, hdd, on=['first_name', 'last_name'], how='inner')
merged_actors_hdd

Unnamed: 0,actor_id,first_name,last_name,title,release_year,category_id
0,1,PENELOPE,GUINESS,ACADEMY DINOSAUR,2006,6
1,1,PENELOPE,GUINESS,ANACONDA CONFESSIONS,2006,2
2,1,PENELOPE,GUINESS,ANGELS LIFE,2006,13
3,1,PENELOPE,GUINESS,BULWORTH COMMANDMENTS,2006,10
4,1,PENELOPE,GUINESS,CHEAPER CLYDE,2006,14
...,...,...,...,...,...,...
995,39,GOLDIE,BRODY,COMANCHEROS ENEMY,2006,3
996,39,GOLDIE,BRODY,DAISY MENAGERIE,2006,14
997,39,GOLDIE,BRODY,DESERT POSEIDON,2006,11
998,39,GOLDIE,BRODY,EVERYONE CRAFT,2006,9


In [64]:
merged_actors_films = pd.merge(merged_actors_hdd, films, on='title', how='left')
merged_actors_films

Unnamed: 0,actor_id,first_name,last_name,title,release_year_x,category_id,film_id,description,release_year_y,language,rental_duration,rental_rate,length,replacement_cost,rating,special_features
0,1,PENELOPE,GUINESS,ACADEMY DINOSAUR,2006,6,1,A Epic Drama of a Feminist And a Mad Scientist...,2006,English,6,0.99,86,20.99,PG,"Deleted Scenes,Behind the Scenes"
1,1,PENELOPE,GUINESS,ANACONDA CONFESSIONS,2006,2,23,A Lacklusture Display of a Dentist And a Denti...,2006,English,3,0.99,92,9.99,R,"Trailers,Deleted Scenes"
2,1,PENELOPE,GUINESS,ANGELS LIFE,2006,13,25,A Thoughtful Display of a Woman And a Astronau...,2006,English,3,2.99,74,15.99,G,Trailers
3,1,PENELOPE,GUINESS,BULWORTH COMMANDMENTS,2006,10,106,A Amazing Display of a Mad Cow And a Pioneer w...,2006,English,4,2.99,61,14.99,G,Trailers
4,1,PENELOPE,GUINESS,CHEAPER CLYDE,2006,14,140,A Emotional Character Study of a Pioneer And a...,2006,English,6,0.99,87,23.99,G,"Trailers,Commentaries,Behind the Scenes"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,39,GOLDIE,BRODY,COMANCHEROS ENEMY,2006,3,168,A Boring Saga of a Lumberjack And a Monkey who...,2006,English,5,0.99,67,23.99,R,"Trailers,Behind the Scenes"
996,39,GOLDIE,BRODY,DAISY MENAGERIE,2006,14,203,A Fast-Paced Saga of a Pastry Chef And a Monke...,2006,English,5,4.99,84,9.99,G,"Trailers,Commentaries,Behind the Scenes"
997,39,GOLDIE,BRODY,DESERT POSEIDON,2006,11,222,A Brilliant Documentary of a Butler And a Fris...,2006,English,4,4.99,64,27.99,R,"Trailers,Behind the Scenes"
998,39,GOLDIE,BRODY,EVERYONE CRAFT,2006,9,290,A Fateful Display of a Waitress And a Dentist ...,2006,English,4,0.99,163,29.99,PG,"Trailers,Commentaries"


**We will transform this dataframe to only keep the "actor_id" and "film_id" columns to make the relationship between the two dataframes in the database.**

In [65]:
merged_actors_films = merged_actors_films[['actor_id', 'film_id']]
merged_actors_films

Unnamed: 0,actor_id,film_id
0,1,1
1,1,23
2,1,25
3,1,106
4,1,140
...,...,...
995,39,168
996,39,203
997,39,222
998,39,290


**We export the resulting dataframe.**

In [66]:
merged_actors_films.to_csv('../data/actor_film_clean.csv', index=False)