In [5]:
import pandas as pd
import numpy as np
df_tournage = pd.read_csv("src/lieux-de-tournage-a-paris.csv", delimiter=';', on_bad_lines='skip')

# Drop unused :
df_tournage = df_tournage.drop(columns=["Identifiant du lieu","Localisation de la scène","Année du tournage","Coordonnée en X", "Coordonnée en Y","geo_shape"])

# Corrections Type :
df_tournage["Code postal"] = pd.to_numeric(df_tournage["Code postal"])
df_tournage = df_tournage.dropna(subset = ['Code postal'])
df_tournage["Code postal"] = df_tournage["Code postal"].astype(int)
df_tournage["Date de début"] = pd.to_datetime(df_tournage["Date de début"])
df_tournage["Date de fin"] = pd.to_datetime(df_tournage["Date de fin"])
df_tournage["latitude"] = pd.to_numeric( [ x.split(",")[0] for x in df_tournage["geo_point_2d"][:][:] ] )
df_tournage["longitude"] = pd.to_numeric( [ x.split(",")[1] for x in df_tournage["geo_point_2d"][:][:] ] )
df_tournage = df_tournage.drop(columns="geo_point_2d")

# Clear Name Columns :
df_tournage = df_tournage.rename(columns = {"Type de tournage":"type", "Code postal":"postal", "Date de début":"debut", "Date de fin":"fin"})
df_tournage.columns = [ x.lower() for x in df_tournage.columns.tolist() ] # ABC -> abc

# keep only films :
df_tournage = df_tournage[df_tournage["type"] == 'Long métrage']
df_tournage = df_tournage.drop(columns= ['type'])

df_tournage = df_tournage.reset_index().drop(columns=['index'])

df_tournage.head()

Unnamed: 0,titre,réalisateur,producteur,postal,debut,fin,latitude,longitude
0,TOUT S'EST BIEN PASSE,Francois OZON,MANDARIN PRODUCTION,75013,2020-08-20,2020-08-21,48.83566,2.348315
1,Une jeune fille qui va bien,Sandrine Kiberlain,CURIOSA FILMS,75004,2020-08-31,2020-09-01,48.854533,2.361694
2,French Exit,Azazel Jacobs,Same Player,75012,2019-12-04,2019-12-04,48.850067,2.376519
3,FIN DE MATINEE,Hiroshi NISHATANI,COMME DES CINEMAS,75004,2018-11-05,2018-11-05,48.854112,2.354679
4,HORS NORMES,Eric Toledano et Olivier Nakache,ADNP QUAD FILMS,75001,2018-11-05,2018-11-06,48.865744,2.327446


In [6]:
import requests
# https://www.w3schools.com/python/ref_requests_get.asp
# https://www.imdb.com/search/title/?title=TOUT+S%27EST+BIEN+PASSE&release_date=2015-01-01,2022-12-31

# Creating data that is usually called payload
payload = {'title': "TOUT S'EST BIEN PASSE", 'release_date': "2015-01-01,2022-12-31"}


# Getting data that includes payload as parameters
r = requests.get('https://www.imdb.com/search/title/', params=payload)
r

<Response [200]>

In [7]:
# r.status_code

# r.content

parameters = ('param', 'value')
base_url = 'https://www.imdb.com/search/title/'
query = ''.join((parameters[0], parameters[1]))
url = '/'.join((base_url, query))


In [8]:
# Creating data that is usually called payload
payload = {'title': "TOUT S'EST BIEN PASSE", 'release_date': "2015-01-01,2022-12-31"}

# https://www.imdb.com/search/title/?title=TOUT+S%27EST+BIEN+PASSE&release_date=2015-01-01,2022-12-31

import urllib.parse

base = "TOUT S'EST BIEN PASSE"
base = urllib.parse.quote(base)

base

'TOUT%20S%27EST%20BIEN%20PASSE'

In [9]:
url_base = 'https://www.imdb.com/search/title/?title='
param2 = '&release_date=2015-01-01,2022-12-31'

df_tournage['imdb_search'] = ''
# https://www.imdb.com/search/title/?title=TOUT+S%27EST+BIEN+PASSE&release_date=2015-01-01,2022-12-31

for i in range( len(df_tournage) ):

    title = urllib.parse.quote(df_tournage.iloc[i]['titre']).replace('%20', '+')

    df_tournage.imdb_search[i] = url_base + title + param2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tournage.imdb_search[i] = url_base + title + param2


In [10]:
df_tournage

Unnamed: 0,titre,réalisateur,producteur,postal,debut,fin,latitude,longitude,imdb_search
0,TOUT S'EST BIEN PASSE,Francois OZON,MANDARIN PRODUCTION,75013,2020-08-20,2020-08-21,48.835660,2.348315,https://www.imdb.com/search/title/?title=TOUT+...
1,Une jeune fille qui va bien,Sandrine Kiberlain,CURIOSA FILMS,75004,2020-08-31,2020-09-01,48.854533,2.361694,https://www.imdb.com/search/title/?title=Une+j...
2,French Exit,Azazel Jacobs,Same Player,75012,2019-12-04,2019-12-04,48.850067,2.376519,https://www.imdb.com/search/title/?title=Frenc...
3,FIN DE MATINEE,Hiroshi NISHATANI,COMME DES CINEMAS,75004,2018-11-05,2018-11-05,48.854112,2.354679,https://www.imdb.com/search/title/?title=FIN+D...
4,HORS NORMES,Eric Toledano et Olivier Nakache,ADNP QUAD FILMS,75001,2018-11-05,2018-11-06,48.865744,2.327446,https://www.imdb.com/search/title/?title=HORS+...
...,...,...,...,...,...,...,...,...,...
5827,On sourit pour la photo,François Uzan,RADAR FILMS,75015,2020-08-26,2020-08-27,48.847239,2.307775,https://www.imdb.com/search/title/?title=On+so...
5828,SOOF 3,ANNE DE CLERCQ,Soof BV P/a,75001,2019-11-20,2019-11-20,48.856448,2.342153,https://www.imdb.com/search/title/?title=SOOF+...
5829,BEFIKRE,ADITYA CHOPRA,FIRSTEP,75004,2016-02-26,2016-02-26,48.854714,2.354821,https://www.imdb.com/search/title/?title=BEFIK...
5830,MARIAGE BLANC,TAREK BOUDALI,AXEL FILMS,75005,2016-08-31,2016-08-31,48.844672,2.343855,https://www.imdb.com/search/title/?title=MARIA...


In [11]:
df_tournage.to_csv(r'export/df_tournage_with_URL.csv', index = False, header=True)