In [2]:
pip install tqdm


Collecting tqdmNote: you may need to restart the kernel to use updated packages.

  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.6 kB ? eta -:--:--
     ------- -------------------------------- 10.2/57.6 kB ? eta -:--:--
     ------------- ------------------------ 20.5/57.6 kB 162.5 kB/s eta 0:00:01
     ------------- ------------------------ 20.5/57.6 kB 162.5 kB/s eta 0:00:01
     --------------------------------- ---- 51.2/57.6 kB 238.1 kB/s eta 0:00:01
     -------------------------------------- 57.6/57.6 kB 251.7 kB/s eta 0:00:00
Downloading tqdm-4.66.4-py3-none-any.whl (78 kB)
   ---------------------------------------- 0.0/78.3 kB ? eta -:--:--
   --------------- ------------------------ 30.7/78.3 kB ? eta -:--:--
   -------------------- ------------------- 41.0/78.3 kB 653.6 kB/s eta 0:00:01
   ---------------------------------------- 78.3/78.3 kB 618.5 kB/s eta 0:00:00
Installing collected packages: tqdm
Success

In [3]:
import pandas as pd
import imdb
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm  # Progress bar

# Initialize IMDbPY
moviesDB = imdb.IMDb()

# Read movie names from Excel file
movie_df = pd.read_excel('Movie Names.xlsx')
movie_names = movie_df['Movie Title'].tolist()  # Adjust column name if necessary

# Lists to store movie details
movie_details = []

# Function to get movie details with retry mechanism
def get_movie_details(movie_name, retries=3):
    for attempt in range(retries):
        try:
            movies = moviesDB.search_movie(movie_name)
            if movies:
                first_movie = movies[0]
                movie_id = first_movie.movieID
                movie = moviesDB.get_movie(movie_id)
                
                title = movie.get('title', 'N/A')
                year = movie.get('year', 'N/A')
                rating = movie.get('rating', 'N/A')
                genres = ', '.join(movie.get('genres', []))
                cast = ', '.join([person['name'] for person in movie.get('cast', [])[:5]])  # Get top 5 cast members
                
                return title, year, rating, genres, cast
        except Exception as e:
            print(f"Error retrieving details for {movie_name} (attempt {attempt+1}/{retries}): {e}")
            time.sleep(2)  # Wait before retrying
    return 'N/A', 'N/A', 'N/A', 'N/A', 'N/A'

# Using ThreadPoolExecutor for parallel processing with progress bar
with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers based on your system
    future_to_movie = {executor.submit(get_movie_details, name): name for name in movie_names}
    for future in tqdm(as_completed(future_to_movie), total=len(future_to_movie), desc="Fetching movie details"):
        movie_details.append(future.result())

# Unpack movie details
movie_titles, movie_years, movie_ratings, movie_genres, movie_casts = zip(*movie_details)

# Create a DataFrame
result_df = pd.DataFrame({
    'Title': movie_titles,
    'Year': movie_years,
    'Rating': movie_ratings,
    'Genres': movie_genres,
    'Cast': movie_casts
})

# Print or save the DataFrame
print(result_df)
result_df.to_excel('movies_info.xlsx', index=False)  # Save to Excel file


Fetching movie details:  60%|█████████████████████████████████▍                      | 485/811 [08:44<06:19,  1.17s/it]2024-06-25 21:36:22,320 CRITICAL [imdbpy] C:\Users\eklav\AppData\Local\Programs\Python\Python311\Lib\site-packages\imdb\_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/find/?q=D&s=tt', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 500: 'Internal Server Error'>},); kwds: {}
Traceback (most recent call last):
  File "C:\Users\eklav\AppData\Local\Programs\Python\Python311\Lib\site-packages\imdb\parser\http\__init__.py", line 233, in retrieve_unicode
    response = uopener.open(url)
               ^^^^^^^^^^^^^^^^^
  File "C:\Users\eklav\AppData\Local\Programs\Python\Python311\Lib\urllib\request.py", line 525, in open
    response = meth(req, response)
               ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\eklav\AppData\Local\Programs\Python\Python311\Lib\urllib\reque

Error retrieving details for D (attempt 1/3): {'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/find/?q=D&s=tt', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 500: 'Internal Server Error'>}


Fetching movie details: 100%|████████████████████████████████████████████████████████| 811/811 [13:50<00:00,  1.02s/it]


                   Title  Year Rating                            Genres  \
0      Bajrangi Bhaijaan  2015    8.1  Action, Adventure, Comedy, Drama   
1        Rang De Basanti  2006    8.1              Comedy, Crime, Drama   
2         Chak De! India  2007    8.1              Drama, Family, Sport   
3        My Name Is Khan  2010    7.9         Adventure, Drama, Romance   
4    Like Stars on Earth  2007    8.3                     Drama, Family   
..                   ...   ...    ...                               ...   
806     Parde Ke Peechey  1971    5.9                     Action, Crime   
807              Dhamkee  1973    5.6     Action, Crime, Drama, Musical   
808    Ladki Sahyadri Ki  1966    5.0                    Musical, Drama   
809            Madadgaar  1987    5.3     Action, Crime, Drama, Romance   
810         Chhoo Mantar  1956    6.0                            Comedy   

                                                  Cast  
0    Salman Khan, Harshaali Malhotra, Nawa

In [4]:
movie_df

Unnamed: 0,Movie Title
0,3 Idiots
1,Taare Zameen Par
2,PK
3,Dangal
4,Rang De Basanti
...,...
806,Parde Ke Peechey
807,Dhamkee
808,Chhoo Mantar
809,Ladki Sahyadri Ki


In [5]:
result_df

Unnamed: 0,Title,Year,Rating,Genres,Cast
0,Bajrangi Bhaijaan,2015,8.1,"Action, Adventure, Comedy, Drama","Salman Khan, Harshaali Malhotra, Nawazuddin Si..."
1,Rang De Basanti,2006,8.1,"Comedy, Crime, Drama","Aamir Khan, Siddharth, Sharman Joshi, Kunal Ka..."
2,Chak De! India,2007,8.1,"Drama, Family, Sport","Shah Rukh Khan, Vidya Malvade, Sagarika Ghatge..."
3,My Name Is Khan,2010,7.9,"Adventure, Drama, Romance","Shah Rukh Khan, Kajol, Katie Amanda Keane, Ken..."
4,Like Stars on Earth,2007,8.3,"Drama, Family","Darsheel Safary, Aamir Khan, Tisca Chopra, Vip..."
...,...,...,...,...,...
806,Parde Ke Peechey,1971,5.9,"Action, Crime","Vinod Mehra, Yogeeta Bali, Pran Sikand, Bindu ..."
807,Dhamkee,1973,5.6,"Action, Crime, Drama, Musical","Vinod Khanna, Kumkum, Mehmood Jr., Ramesh Deo,..."
808,Ladki Sahyadri Ki,1966,5.0,"Musical, Drama","Shalini Abhyankar, Keshavrao Date, Vatsala Des..."
809,Madadgaar,1987,5.3,"Action, Crime, Drama, Romance","Jeetendra, Sulakshana Pandit, Aruna Irani, Mad..."
