In [425]:
import os
import json

json_folder = 'actors_by_country'
merged_data = {}

for file in os.listdir(json_folder):
    if file.endswith('.json'):
        file_path = os.path.join(json_folder, file)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        
        for country, actors in data.items():
            if country not in merged_data:
                merged_data[country] = {}
            merged_data[country].update(actors)

with open('merged_actors.json', 'w', encoding='utf-8') as file_one:
    json.dump(merged_data, file_one, indent=4)


In [426]:
import pandas as pd
import re

In [427]:
df = pd.read_json('merged_actors.json')

df = df.stack().apply(pd.Series)
df.reset_index(inplace=True)
df = df.rename(columns={'level_0': 'actor_name', 'level_1': 'country'})

In [428]:
df.head()

Unnamed: 0,actor_name,country,aged_of_death,born_date,born_place,date_of_death,filmography,place_of_death,years_of_active
0,Agim Kaba (1980),Albania,,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",,,,2001–present
1,Agim Qirjaqi (1950–2010),Albania,,,,,,,
2,Agim Shuka (1942–1992),Albania,50.0,29 April 1942,"Kuçovë, Albania",,,"Rome, Italy",1966–1992
3,Albert Vërria (1936–2015),Albania,78.0,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",2015-08-17,"[Trapi i Vjetër - (2005), Misioni përtej detit...","Vlorë, Albania",
4,Aleko Prodani (1942–2006),Albania,,,,,,,


In [429]:
df.shape

(11268, 9)

In [430]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11268 entries, 0 to 11267
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   actor_name       11268 non-null  object
 1   country          11268 non-null  object
 2   aged_of_death    3576 non-null   object
 3   born_date        8856 non-null   object
 4   born_place       8835 non-null   object
 5   date_of_death    2848 non-null   object
 6   filmography      6134 non-null   object
 7   place_of_death   3232 non-null   object
 8   years_of_active  7679 non-null   object
dtypes: object(9)
memory usage: 792.4+ KB


# Data cleaning

## actor name feature

In [431]:
df['actor_name'].head(3)

0            Agim Kaba (1980)
1    Agim Qirjaqi (1950–2010)
2      Agim Shuka (1942–1992)
Name: actor_name, dtype: object

In [432]:
def take_actor_name(value):
    pattern = r'\(.*?\)'
    return re.sub(pattern, '', value)


In [433]:
df['name'] = df['actor_name'].apply(take_actor_name)
cols = list(df.columns)
cols.insert(1, cols.pop())
df = df[cols]

In [434]:
df[df['country'] == 'Bulgaria']

Unnamed: 0,actor_name,name,country,aged_of_death,born_date,born_place,date_of_death,filmography,place_of_death,years_of_active
921,Adriana Budevska,Adriana Budevska,Bulgaria,,,,,,,
922,Anjela Nedyalkova,Anjela Nedyalkova,Bulgaria,,(1991-03-02),"March 2, 1991 (age 33)Sofia, Bulgaria",,"[Eastern Plays, Avé, Bulgarian Rhapsody, The P...",,2009–present
923,Anton Gorchev,Anton Gorchev,Bulgaria,60,,,,,"Sofia, Bulgaria",1963–1998
924,Apostol Karamitev,Apostol Karamitev,Bulgaria,50,17 October 1923,"Burgas, Bulgaria",1973-11-09,"[Utro nad Rodinata (1951) as Velizarov, Pod ig...","Sofia, Bulgaria",
925,Asen Kisimov,Asen Kisimov,Bulgaria,69,(1936-05-03),"3 May 1936Plovdiv, Bulgaria",2005-07-13,,"Blagoevgrad, Bulgaria",1955–2005
...,...,...,...,...,...,...,...,...,...,...
981,Viktor Kalev,Viktor Kalev,Bulgaria,,,,,,,
982,Yana Marinova,Yana Marinova,Bulgaria,,(1978-08-17),"17 August 1978 (age 46)Sofia, Bulgaria",,"[Hotel ""Bulgaria"", Lake Placid 2, Lyudmil & Ro...",,2004–present
983,Yuriy Yakovlev,Yuriy Yakovlev,Bulgaria,71,(1930-10-05),"October 5, 1930Riga, Latvia",,"[The Past-Master, Gerlovo Event, Hitchhiking, ...","Sofia, Bulgaria",1955–2000
984,Zahari Baharov,Zahari Baharov,Bulgaria,,(1980-08-12),"August 12, 1980 (age 44)Sofia, Bulgaria",,"[Air Marshal, Criminal, The Rebel of L, War, I...",,2003–present


In [435]:
df.head(10)

Unnamed: 0,actor_name,name,country,aged_of_death,born_date,born_place,date_of_death,filmography,place_of_death,years_of_active
0,Agim Kaba (1980),Agim Kaba,Albania,,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",,,,2001–present
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,50.0,29 April 1942,"Kuçovë, Albania",,,"Rome, Italy",1966–1992
3,Albert Vërria (1936–2015),Albert Vërria,Albania,78.0,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",2015-08-17,"[Trapi i Vjetër - (2005), Misioni përtej detit...","Vlorë, Albania",
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,,,,
5,Aleksandër Moisiu (1879–1935),Aleksandër Moisiu,Albania,55.0,(1879-04-02),"2 April 1879Trieste, Austria-Hungary",1935-03-22,,"Vienna, Austria",1899–1935
6,Ana Golja (1996),Ana Golja,Albania,,,,,"[En Vogue Christmas, Full Out: The Ariana Berl...",,2005–present
7,Andon Qesari (1942–2021),Andon Qesari,Albania,78.0,(1942-06-27),"27 June 1942Qeparo, Himarë, Albania",2021-02-13,"[Gadhnjim mbi vdekjen (1967), Gjurma (1970), N...",,
8,Antoneta Papapavli (1938–2013),Antoneta Papapavli,Albania,75.0,(1938-02-17),"17 February 1938Përmet, Kingdom of Albania",2013-09-13,,"Tirana, Albania",1964–1985
9,Bekim Fehmiu (1936–2010),Bekim Fehmiu,Albania,74.0,(1936-06-01),"1 June 1936Sarajevo, Kingdom of Yugoslavia (mo...",2010-06-15,"[Subotom uvece, Tu ne tueras point, Saša, Pod ...","Belgrade, Serbia",1953–1998


In [436]:
cleaned_df = df[['actor_name', 'name', 'country', 'born_date', 'born_place', 'years_of_active', 'filmography']]

In [437]:
cleaned_df

Unnamed: 0,actor_name,name,country,born_date,born_place,years_of_active,filmography
0,Agim Kaba (1980),Agim Kaba,Albania,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,29 April 1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria (1936–2015),Albert Vërria,Albania,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",,"[Trapi i Vjetër - (2005), Misioni përtej detit..."
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,
...,...,...,...,...,...,...,...
11263,Zizi Strallen (born 1990),Zizi Strallen,United Kingdom,(1990-11-01),"1 November 1990 (age 34)[1]London, England",1995–present,"[The Snow Queen, The Ugly Duckling, Merrily We..."
11264,Zoe Boyle (born 1989),Zoe Boyle,United Kingdom,(1989-01-01),1 January 1989 (age 36),,"[Freeloaders, Promise at Dawn, Rémi sans famil..."
11265,Zoë Lister (born 1982),Zoë Lister,United Kingdom,,,2005–present,
11266,Zoë Wanamaker (born 1949) (born in New York Ci...,Zoë Wanamaker,United Kingdom,(1949-05-13),"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,[]


## filmography

In [438]:
def clean_filmography(value):
    result = ', '.join(value) if value else None
    result = result.replace(' -', '') if result else None
    pattern = r'\s?\(.*?\)\s*-*'
    return re.sub(pattern, '', result) if result else None

In [439]:
cleaned_df['filmography'] = cleaned_df['filmography'].apply(clean_filmography)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['filmography'] = cleaned_df['filmography'].apply(clean_filmography)


In [440]:
cleaned_df

Unnamed: 0,actor_name,name,country,born_date,born_place,years_of_active,filmography
0,Agim Kaba (1980),Agim Kaba,Albania,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,29 April 1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria (1936–2015),Albert Vërria,Albania,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,
...,...,...,...,...,...,...,...
11263,Zizi Strallen (born 1990),Zizi Strallen,United Kingdom,(1990-11-01),"1 November 1990 (age 34)[1]London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle (born 1989),Zoe Boyle,United Kingdom,(1989-01-01),1 January 1989 (age 36),,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister (born 1982),Zoë Lister,United Kingdom,,,2005–present,
11266,Zoë Wanamaker (born 1949) (born in New York Ci...,Zoë Wanamaker,United Kingdom,(1949-05-13),"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,


In [441]:
cleaned_df['filmography'].unique()

array([None,
       'Trapi i Vjetër, Misioni përtej detit, Në emër të lirisë, Rrethimi i vogël, Melodi e pandërprerë, Ditë të qytetit tim, Goditja, Përtej mureve të gurta, "Koncert në vitin 1936", Nga mesi i errësirës, I treti, Thirrja, Në fillim të verës, Shtigje të luftës, Shpërthimi        , Operacioni zjarri   , Brazdat, Kapedani, Kur zbardhi një ditë, I teti në bronz[6]',
       'En Vogue Christmas, Full Out: The Ariana Berlin Movie, Taylor, Love On Ice, Crazy for the Boys, The Cuban, Leah, Social Industrial Meltdown, Off the Grid',
       ...,
       'The Snow Queen, The Ugly Duckling, Merrily We Roll Along, Cinderella, The Car Man, National Theatre Live: Follies, Cats',
       'Freeloaders, Promise at Dawn, Rémi sans famille, The Last Letter from Your Lover, Living',
       'Benefactor: Scenes from an Arrangement, Timecode, Slash, Hidalgo, The Merchant of Venice, The Namesake, Fist Full of Love, Ask Me Anything, The Boy, American Fable'],
      dtype=object)

## years of active

In [442]:
cleaned_df['years_of_active'].unique()

array(['2001–present', None, '1966–1992', ..., '1909–1945', '1925–1973',
       '1899–1969'], dtype=object)

### Born year

In [443]:
cleaned_df

Unnamed: 0,actor_name,name,country,born_date,born_place,years_of_active,filmography
0,Agim Kaba (1980),Agim Kaba,Albania,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,29 April 1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria (1936–2015),Albert Vërria,Albania,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,
...,...,...,...,...,...,...,...
11263,Zizi Strallen (born 1990),Zizi Strallen,United Kingdom,(1990-11-01),"1 November 1990 (age 34)[1]London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle (born 1989),Zoe Boyle,United Kingdom,(1989-01-01),1 January 1989 (age 36),,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister (born 1982),Zoë Lister,United Kingdom,,,2005–present,
11266,Zoë Wanamaker (born 1949) (born in New York Ci...,Zoë Wanamaker,United Kingdom,(1949-05-13),"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,


In [444]:
df.head(3)

Unnamed: 0,actor_name,name,country,aged_of_death,born_date,born_place,date_of_death,filmography,place_of_death,years_of_active
0,Agim Kaba (1980),Agim Kaba,Albania,,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",,,,2001–present
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,50.0,29 April 1942,"Kuçovë, Albania",,,"Rome, Italy",1966–1992


In [445]:
def create_born_year_feature(value):
    match = re.search(r'\b(\d{4})\b', value) if value else None
    return match.group(1) if match else None

In [446]:
cleaned_df['born_year'] = cleaned_df['born_date'].apply(create_born_year_feature)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['born_year'] = cleaned_df['born_date'].apply(create_born_year_feature)


In [447]:
cleaned_df.head(10)

Unnamed: 0,actor_name,name,country,born_date,born_place,years_of_active,filmography,born_year
0,Agim Kaba (1980),Agim Kaba,Albania,(1980-02-16),"February 16, 1980 (age 45)New York City, U.S.",2001–present,,1980.0
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,29 April 1942,"Kuçovë, Albania",1966–1992,,1942.0
3,Albert Vërria (1936–2015),Albert Vërria,Albania,(1936-09-03),"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ...",1936.0
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,,
5,Aleksandër Moisiu (1879–1935),Aleksandër Moisiu,Albania,(1879-04-02),"2 April 1879Trieste, Austria-Hungary",1899–1935,,1879.0
6,Ana Golja (1996),Ana Golja,Albania,,,2005–present,"En Vogue Christmas, Full Out: The Ariana Berli...",
7,Andon Qesari (1942–2021),Andon Qesari,Albania,(1942-06-27),"27 June 1942Qeparo, Himarë, Albania",,"Gadhnjim mbi vdekjen, Gjurma, Në fillim të ver...",1942.0
8,Antoneta Papapavli (1938–2013),Antoneta Papapavli,Albania,(1938-02-17),"17 February 1938Përmet, Kingdom of Albania",1964–1985,,1938.0
9,Bekim Fehmiu (1936–2010),Bekim Fehmiu,Albania,(1936-06-01),"1 June 1936Sarajevo, Kingdom of Yugoslavia (mo...",1953–1998,"Subotom uvece, Tu ne tueras point, Saša, Pod i...",1936.0


In [448]:
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11268 entries, 0 to 11267
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   actor_name       11268 non-null  object
 1   name             11268 non-null  object
 2   country          11268 non-null  object
 3   born_date        8856 non-null   object
 4   born_place       8835 non-null   object
 5   years_of_active  7679 non-null   object
 6   filmography      5829 non-null   object
 7   born_year        8856 non-null   object
dtypes: object(8)
memory usage: 704.4+ KB


In [449]:
cols = list(cleaned_df.columns)
cols.insert(3, cols.pop())
cleaned_df = cleaned_df[cols]
cleaned_df = cleaned_df.drop(labels='born_date', axis=1)

In [450]:
cleaned_df

Unnamed: 0,actor_name,name,country,born_year,born_place,years_of_active,filmography
0,Agim Kaba (1980),Agim Kaba,Albania,1980,"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria (1936–2015),Albert Vërria,Albania,1936,"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,,,,
...,...,...,...,...,...,...,...
11263,Zizi Strallen (born 1990),Zizi Strallen,United Kingdom,1990,"1 November 1990 (age 34)[1]London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle (born 1989),Zoe Boyle,United Kingdom,1989,1 January 1989 (age 36),,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister (born 1982),Zoë Lister,United Kingdom,,,2005–present,
11266,Zoë Wanamaker (born 1949) (born in New York Ci...,Zoë Wanamaker,United Kingdom,1949,"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,


In [451]:
def fill_from_name_born_year(row):
    value1 = row['actor_name']
    original_value = row['born_year']
    if original_value is None:
        pattern = r'\(?(\d{4})\)?'
        match = re.search(pattern, value1)
        if match:
            birth_year = match.group(1) if match.group(1) else None
            return birth_year
    else:
        return original_value
        

In [452]:
cleaned_df['born_year'] = cleaned_df.apply(fill_from_name_born_year, axis=1)

In [453]:
cleaned_df

Unnamed: 0,actor_name,name,country,born_year,born_place,years_of_active,filmography
0,Agim Kaba (1980),Agim Kaba,Albania,1980,"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi (1950–2010),Agim Qirjaqi,Albania,1950,,,
2,Agim Shuka (1942–1992),Agim Shuka,Albania,1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria (1936–2015),Albert Vërria,Albania,1936,"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani (1942–2006),Aleko Prodani,Albania,1942,,,
...,...,...,...,...,...,...,...
11263,Zizi Strallen (born 1990),Zizi Strallen,United Kingdom,1990,"1 November 1990 (age 34)[1]London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle (born 1989),Zoe Boyle,United Kingdom,1989,1 January 1989 (age 36),,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister (born 1982),Zoë Lister,United Kingdom,1982,,2005–present,
11266,Zoë Wanamaker (born 1949) (born in New York Ci...,Zoë Wanamaker,United Kingdom,1949,"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,


In [454]:
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11268 entries, 0 to 11267
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   actor_name       11268 non-null  object
 1   name             11268 non-null  object
 2   country          11268 non-null  object
 3   born_year        9560 non-null   object
 4   born_place       8835 non-null   object
 5   years_of_active  7679 non-null   object
 6   filmography      5829 non-null   object
dtypes: object(7)
memory usage: 616.3+ KB


In [455]:
cleaned_df = cleaned_df.drop(labels=['actor_name'], axis=1)

In [456]:
cleaned_df

Unnamed: 0,name,country,born_year,born_place,years_of_active,filmography
0,Agim Kaba,Albania,1980,"February 16, 1980 (age 45)New York City, U.S.",2001–present,
1,Agim Qirjaqi,Albania,1950,,,
2,Agim Shuka,Albania,1942,"Kuçovë, Albania",1966–1992,
3,Albert Vërria,Albania,1936,"3 September 1936Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani,Albania,1942,,,
...,...,...,...,...,...,...
11263,Zizi Strallen,United Kingdom,1990,"1 November 1990 (age 34)[1]London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle,United Kingdom,1989,1 January 1989 (age 36),,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister,United Kingdom,1982,,2005–present,
11266,Zoë Wanamaker,United Kingdom,1949,"13 May 1949 (age 75)[1][2]New York City, U.S.",1970–present,


### born place

In [457]:
cleaned_df['born_place'].unique()

array(['February 16, 1980 (age\xa045)New York City, U.S.', None,
       'Kuçovë, Albania', ...,
       '1 November 1990 (age\xa034)[1]London, England',
       '1 January 1989 (age\xa036)',
       '13 May 1949 (age\xa075)[1][2]New York City, U.S.'], dtype=object)

In [458]:
def clean_born_place(value):
    pattern = r'([A-Za-z\s]+(?:[A-Za-z\s]+)?)\s*,\s*([A-Za-z\s]+(?:[A-Za-z\s]+)?)'
    if value is None:
        return None
    else:
        match = re.search(pattern, value)
        if match:
            return match.group(1).strip() + ', ' + match.group(2).strip() if match.group(1) and match.group(2) else value

In [459]:
cleaned_df['born_place'] = cleaned_df['born_place'].apply(clean_born_place)

In [460]:
cleaned_df

Unnamed: 0,name,country,born_year,born_place,years_of_active,filmography
0,Agim Kaba,Albania,1980,"New York City, U",2001–present,
1,Agim Qirjaqi,Albania,1950,,,
2,Agim Shuka,Albania,1942,,1966–1992,
3,Albert Vërria,Albania,1936,"Fier, Kingdom of Albania",,"Trapi i Vjetër, Misioni përtej detit, Në emër ..."
4,Aleko Prodani,Albania,1942,,,
...,...,...,...,...,...,...
11263,Zizi Strallen,United Kingdom,1990,"London, England",1995–present,"The Snow Queen, The Ugly Duckling, Merrily We ..."
11264,Zoe Boyle,United Kingdom,1989,,,"Freeloaders, Promise at Dawn, Rémi sans famill..."
11265,Zoë Lister,United Kingdom,1982,,2005–present,
11266,Zoë Wanamaker,United Kingdom,1949,"New York City, U",1970–present,


In [461]:
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11268 entries, 0 to 11267
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             11268 non-null  object
 1   country          11268 non-null  object
 2   born_year        9560 non-null   object
 3   born_place       8289 non-null   object
 4   years_of_active  7679 non-null   object
 5   filmography      5829 non-null   object
dtypes: object(6)
memory usage: 528.3+ KB


In [462]:
finish_df = cleaned_df.dropna()
finish_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3918 entries, 9 to 11263
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             3918 non-null   object
 1   country          3918 non-null   object
 2   born_year        3918 non-null   object
 3   born_place       3918 non-null   object
 4   years_of_active  3918 non-null   object
 5   filmography      3918 non-null   object
dtypes: object(6)
memory usage: 214.3+ KB


In [463]:
finish_df

Unnamed: 0,name,country,born_year,born_place,years_of_active,filmography
9,Bekim Fehmiu,Albania,1936,"Sarajevo, Kingdom of Yugoslavia",1953–1998,"Subotom uvece, Tu ne tueras point, Saša, Pod i..."
16,Erbi Ago,Albania,1990,"Tirana, Albania",2008–present,"Messengers 2: The Scarecrow, Ghost Town, Light..."
24,James Belushi,Albania,1954,"Wheaton, Illinois",1977–present,"The Fury, The Cleansing, Thief, Sugar or Plain..."
25,James Biberi,Albania,1965,"Gjakova, Kosovo",1993–present,"Amongst Friends, Clouds of Magellan, Made Men,..."
26,John Belushi,Albania,1949,"Chicago, Illinois",1972–1982,"Tarzoon: Shame of the Jungle, Animal House, De..."
...,...,...,...,...,...,...
11249,William Roache,United Kingdom,1932,"Basford, Nottingham",1958–present,"Behind the Mask, Coronation Street, The Bulldo..."
11250,William Russell,United Kingdom,1924,"Sunderland, County Durham",1949–2022,"Gift Horse a.k.a. Glory at Sea, Appointment in..."
11252,Wilson Mbomio,United Kingdom,2002,"Hackney, London",2015–present,"Pan, The Favourite, We the Kings, Kindling"
11258,Zack Morris,United Kingdom,1998,"Brentwood, Essex",2007–present,"Autopilot, The Fades, One Night, EastEnders, C..."


In [464]:
finish_df.to_csv('actors.csv', index=False)