<a href="https://colab.research.google.com/github/Vivisteria11/AI-chat-bot/blob/main/DAVE_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers requests pandas torch huggingface_hub

import requests
import pandas as pd
import json
from typing import List, Dict
import time
from datetime import datetime
from huggingface_hub import HfApi, InferenceApi
from transformers import pipeline

class MovieAnalyzer:
    def __init__(self):
        # Sign up at HuggingFace and get your token from: https://huggingface.co/settings/tokens
        self.HF_TOKEN = "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"  # Replace with your token

        # Initialize the generator using FLAN-T5-XL (free to use)
        self.generator = pipeline(
            "text2text-generation",
            model="google/flan-t5-large",
            max_length=200,
            device="cpu"  # Use "cuda" if you have GPU
        )

        # Sample movie dataset
        self.data = {
            'movie_title': [
                'Inception',
                'The Shawshank Redemption',
                'The Dark Knight'
            ],
            'genres': [
                'Action, Sci-Fi',
                'Drama',
                'Action, Crime'
            ],
            'description': [
                'Cobb and his team share the skill of entering one\'s dreams.',
                'Two imprisoned men bond over a number of years.',
                'Batman, Gordon, and Harvey Dent are forced to deal with the chaos unleashed by an anarchist mastermind.'
            ],
            'cast': [
                'Leonardo DiCaprio, Ellen Page',
                'Tim Robbins, Morgan Freeman',
                'Christian Bale, Heath Ledger'
            ]
        }
        self.df = pd.DataFrame(self.data)

    def generate_text(self, prompt: str, max_retries: int = 3) -> str:
        """Generate text using FLAN-T5"""
        for attempt in range(max_retries):
            try:
                # Generate text
                result = self.generator(
                    prompt,
                    max_length=200,
                    num_return_sequences=1,
                    do_sample=True,
                    temperature=0.9
                )
                return result[0]['generated_text']
            except Exception as e:
                if attempt == max_retries - 1:
                    return f"Error generating response: {str(e)}"
                time.sleep(2)

    def generate_creative_description(self, title: str, description: str) -> str:
        prompt = f"""Task: Create an engaging movie description
        Movie: {title}
        Original: {description}
        Write a compelling and detailed description that captures the essence of the movie.
        """
        return self.generate_text(prompt)

    def predict_genres(self, description: str) -> List[str]:
        prompt = f"""Task: Predict movie genres
        Description: {description}
        List the most appropriate genres for this movie (maximum 3 genres).
        """
        response = self.generate_text(prompt)
        # Clean up the response and split into genres
        genres = [g.strip() for g in response.split(',')]
        return genres[:3]  # Ensure maximum 3 genres

    def generate_plot_twist(self, title: str, description: str) -> str:
        prompt = f"""Task: Create a movie plot twist
        Movie: {title}
        Original plot: {description}
        Create a surprising but logical alternative ending or plot twist.
        """
        return self.generate_text(prompt)

    def analyze_movies(self) -> List[Dict]:
        """Process all movies and generate analysis"""
        outputs = []

        for _, row in self.df.iterrows():
            print(f"Processing: {row['movie_title']}")

            analysis = {
                'movie_title': row['movie_title'],
                'original_description': row['description'],
                'generated_description': self.generate_creative_description(
                    row['movie_title'], row['description']
                ),
                'original_genres': row['genres'],
                'predicted_genres': self.predict_genres(row['description']),
                'plot_twist': self.generate_plot_twist(
                    row['movie_title'], row['description']
                ),
                'cast': row['cast'],
                'analysis_timestamp': datetime.now().isoformat()
            }
            outputs.append(analysis)

            # Save progress
            self.save_outputs(outputs)

            # Add delay to prevent overloading
            time.sleep(2)

        return outputs

    def save_outputs(self, outputs: List[Dict], filename: str = 'movie_analysis_outputs.json'):
        """Save outputs to JSON file"""
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(outputs, f, indent=4, ensure_ascii=False)

    def display_outputs(self, outputs: List[Dict]):
        """Display formatted outputs"""
        for output in outputs:
            print("\n" + "="*50)
            print(f"Movie: {output['movie_title']}")
            print("-"*50)
            print(f"Generated Description:\n{output['generated_description']}\n")
            print(f"Original Genres: {output['original_genres']}")
            print(f"Predicted Genres: {', '.join(output['predicted_genres'])}\n")
            print(f"Plot Twist:\n{output['plot_twist']}\n")

# Alternative method using Hugging Face's inference API
class MovieAnalyzerInferenceAPI:
    def __init__(self, hf_token):
        self.api = HfApi()
        self.token = hf_token

    def generate_text(self, prompt: str) -> str:
        try:
            # Using FLAN-T5 through the Inference API
            api_url = "https://api-inference.huggingface.co/models/google/flan-t5-large"
            headers = {"Authorization": f"Bearer {self.token}"}

            response = requests.post(
                api_url,
                headers=headers,
                json={"inputs": prompt}
            )

            if response.status_code == 200:
                return response.json()[0]['generated_text']
            else:
                return f"Error: {response.status_code}"

        except Exception as e:
            return f"Error: {str(e)}"

# Example usage
def main():
    # Choose which version to use
    use_inference_api = False  # Set to True to use the Inference API instead

    if use_inference_api:
        # Replace with your HuggingFace token
        analyzer = MovieAnalyzerInferenceAPI("your-huggingface-token")
    else:
        analyzer = MovieAnalyzer()

    outputs = analyzer.analyze_movies()
    analyzer.display_outputs(outputs)
    print("\nAnalysis complete! Results saved to 'movie_analysis_outputs.json'")

if __name__ == "__main__":
    main()



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]



Processing: Inception
Processing: The Shawshank Redemption
Processing: The Dark Knight

Movie: Inception
--------------------------------------------------
Generated Description:
This American remake of the 1970 classic story introduces the world to a new cast of characters including actors and directors that have a long history as a group. The cast features Kenneth Branagh, Robert De Niro, John Cusack, Tom Wilkinson, Adrien Brody, and Chris Cooper as main characters and features a stellar cast of supporting characters including Christopher Ryan and Ellen Barkin, David Strathairn. The film uses the same music and the same actors as the original, and with the addition of new characters. The opening credits re-tell the story of the American civil rights movement. The opening sequence is a montage of images of a woman being harassed by police. The first scene in the film shows a policeman chase a man through a wooded area. In this scene officers believe the man is a gang member and are ca

In [3]:
# Install required libraries
!pip install transformers requests pandas torch huggingface_hub

import requests
import pandas as pd
import json
from typing import List, Dict
import time
from datetime import datetime
from huggingface_hub import HfApi, InferenceApi
from transformers import pipeline
import torch

# Check GPU availability
print("GPU is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU being used:", torch.cuda.get_device_name(0))
    device = 0  # Use GPU
else:
    print("No GPU available, using CPU")
    device = "cpu"

class MovieAnalyzer:
    def __init__(self):
        self.HF_TOKEN = "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"  # Replace with your token

        # Initialize with GPU support
        self.generator = pipeline(
            "text2text-generation",
            model="google/flan-t5-large",
            max_length=200,
            device=device  # This will use GPU if available
        )

        # 20 movies dataset
        self.data = {
            'movie_title': [
                'Inception',
                'The Shawshank Redemption',
                'The Dark Knight',
                'Pulp Fiction',
                'The Matrix',
                'Forrest Gump',
                'The Godfather',
                'Jurassic Park',
                'Titanic',
                'The Silence of the Lambs',
                'Avatar',
                'The Lord of the Rings: The Fellowship of the Ring',
                'Goodfellas',
                'Fight Club',
                'The Green Mile',
                'Interstellar',
                'The Avengers',
                'Schindler\'s List',
                'The Lion King',
                'Star Wars: Episode IV - A New Hope'
            ],
            'genres': [
                'Action, Sci-Fi',
                'Drama',
                'Action, Crime',
                'Crime, Drama',
                'Sci-Fi, Action',
                'Drama, Romance',
                'Crime, Drama',
                'Adventure, Sci-Fi',
                'Drama, Romance',
                'Crime, Thriller',
                'Sci-Fi, Adventure',
                'Fantasy, Adventure',
                'Crime, Biography',
                'Drama, Thriller',
                'Crime, Drama, Fantasy',
                'Sci-Fi, Adventure',
                'Action, Sci-Fi',
                'Biography, Drama',
                'Animation, Adventure',
                'Sci-Fi, Adventure'
            ],
            'description': [
                'A skilled thief who enters dreams to steal secrets takes on an impossible task: planting an idea in someone\'s mind.',
                'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.',
                'When the Joker wreaks havoc on Gotham City, Batman must confront one of his most notorious enemies.',
                'Various interconnected stories of Los Angeles criminals, small-time mobsters, and other colorful characters.',
                'A computer programmer discovers humanity is trapped inside a simulated reality and joins a rebellion to break free.',
                'The life journey of a slow-witted but kind-hearted man as he witnesses and influences several historical events.',
                'The aging patriarch of an organized crime dynasty transfers control to his reluctant son.',
                'A theme park showcasing genetically recreated dinosaurs turns into a nightmare when the creatures escape.',
                'A seventeen-year-old aristocrat falls in love with a kind but poor artist aboard the luxurious, ill-fated R.M.S. Titanic.',
                'A young FBI cadet must receive the help of an incarcerated and manipulative cannibal killer.',
                'A paraplegic marine dispatched to the moon Pandora becomes torn between following orders and protecting the world he feels is his home.',
                'A young hobbit and his friends embark on a quest to destroy a powerful ring and defeat the dark lord who created it.',
                'The story of Henry Hill and his life in the mob, covering his relationship with his wife Karen and his mob partners.',
                'An insomniac office worker and a devil-may-care soapmaker form an underground fight club.',
                'A tale of a gentle prison guard who discovers that a death row inmate possesses mysterious healing powers.',
                'A team of explorers travel through a wormhole in space in an attempt to ensure humanity\'s survival.',
                'Earth\'s mightiest heroes must come together and learn to fight as a team to stop an alien invasion.',
                'In German-occupied Poland during World War II, businessman Oskar Schindler gradually becomes concerned for his Jewish workforce.',
                'A young lion prince must overcome tragedy and take his rightful place as king of the Pride Lands.',
                'Luke Skywalker joins forces with a Jedi Knight, a cocky pilot, and others to save Princess Leia and battle the Empire\'s world-destroying battle station.'
            ],
            'cast': [
                'Leonardo DiCaprio, Ellen Page',
                'Tim Robbins, Morgan Freeman',
                'Christian Bale, Heath Ledger',
                'John Travolta, Samuel L. Jackson',
                'Keanu Reeves, Laurence Fishburne',
                'Tom Hanks, Robin Wright',
                'Marlon Brando, Al Pacino',
                'Sam Neill, Laura Dern',
                'Leonardo DiCaprio, Kate Winslet',
                'Jodie Foster, Anthony Hopkins',
                'Sam Worthington, Zoe Saldana',
                'Elijah Wood, Ian McKellen',
                'Robert De Niro, Ray Liotta',
                'Brad Pitt, Edward Norton',
                'Tom Hanks, Michael Clarke Duncan',
                'Matthew McConaughey, Anne Hathaway',
                'Robert Downey Jr., Chris Evans',
                'Liam Neeson, Ben Kingsley',
                'Matthew Broderick, James Earl Jones',
                'Mark Hamill, Harrison Ford'
            ]
        }
        self.df = pd.DataFrame(self.data)

    def generate_text(self, prompt: str, max_retries: int = 3) -> str:
        """Generate text using FLAN-T5"""
        for attempt in range(max_retries):
            try:
                result = self.generator(
                    prompt,
                    max_length=250,
                    num_return_sequences=1,
                    do_sample=True,
                    temperature=0.9
                )
                return result[0]['generated_text']
            except Exception as e:
                if attempt == max_retries - 1:
                    return f"Error generating response: {str(e)}"
                time.sleep(2)

    def generate_creative_description(self, title: str, description: str) -> str:
        prompt = f"""Task: Create an engaging movie description
        Movie: {title}
        Original: {description}
        Write a compelling and detailed description that captures the essence of the movie."""
        return self.generate_text(prompt)

    def predict_genres(self, description: str) -> List[str]:
        prompt = f"""Task: Predict movie genres
        Description: {description}
        List the most appropriate genres for this movie (maximum 3 genres)."""
        response = self.generate_text(prompt)
        return [g.strip() for g in response.split(',')]

    def generate_plot_twist(self, title: str, description: str) -> str:
        prompt = f"""Task: Create a movie plot twist
        Movie: {title}
        Original plot: {description}
        Create a surprising but logical alternative ending or plot twist."""
        return self.generate_text(prompt)

    def analyze_movies(self, start_index: int = 0, batch_size: int = 5) -> List[Dict]:
        """Process movies in batches"""
        all_outputs = []

        for i in range(start_index, len(self.df), batch_size):
            batch = self.df.iloc[i:i+batch_size]
            print(f"\nProcessing batch {i//batch_size + 1} of {len(self.df)//batch_size + 1}")

            for _, row in batch.iterrows():
                print(f"\nProcessing: {row['movie_title']}")

                analysis = {
                    'movie_title': row['movie_title'],
                    'original_description': row['description'],
                    'generated_description': self.generate_creative_description(
                        row['movie_title'], row['description']
                    ),
                    'original_genres': row['genres'],
                    'predicted_genres': self.predict_genres(row['description']),
                    'plot_twist': self.generate_plot_twist(
                        row['movie_title'], row['description']
                    ),
                    'cast': row['cast'],
                    'analysis_timestamp': datetime.now().isoformat()
                }
                all_outputs.append(analysis)

                # Save progress after each movie
                self.save_outputs(all_outputs, f'movie_analysis_outputs_batch_{i//batch_size + 1}.json')

                # Add delay to prevent overloading
                time.sleep(2)

            print(f"Completed batch {i//batch_size + 1}")

        # Save final complete output
        self.save_outputs(all_outputs, 'movie_analysis_outputs_complete.json')
        return all_outputs

    def save_outputs(self, outputs: List[Dict], filename: str = 'movie_analysis_outputs.json'):
        """Save outputs to JSON file"""
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(outputs, f, indent=4, ensure_ascii=False)

    def display_outputs(self, outputs: List[Dict]):
        """Display formatted outputs"""
        for output in outputs:
            print("\n" + "="*50)
            print(f"Movie: {output['movie_title']}")
            print("-"*50)
            print(f"Generated Description:\n{output['generated_description']}\n")
            print(f"Original Genres: {output['original_genres']}")
            print(f"Predicted Genres: {', '.join(output['predicted_genres'])}\n")
            print(f"Plot Twist:\n{output['plot_twist']}\n")

def main():
    print("\nInitializing with GPU support...")
    analyzer = MovieAnalyzer()

    # Adjust batch size based on GPU availability
    batch_size = 8 if torch.cuda.is_available() else 5
    print(f"Using batch size: {batch_size}")

    outputs = analyzer.analyze_movies(batch_size=batch_size)
    analyzer.display_outputs(outputs)
    print("\nAnalysis complete! Results saved to 'movie_analysis_outputs_complete.json'")

if __name__ == "__main__":
    main()

GPU is available: True
GPU being used: Tesla T4

Initializing with GPU support...
Using batch size: 8

Processing batch 1 of 3

Processing: Inception

Processing: The Shawshank Redemption

Processing: The Dark Knight

Processing: Pulp Fiction


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Processing: The Matrix

Processing: Forrest Gump

Processing: The Godfather

Processing: Jurassic Park
Completed batch 1

Processing batch 2 of 3

Processing: Titanic

Processing: The Silence of the Lambs

Processing: Avatar

Processing: The Lord of the Rings: The Fellowship of the Ring

Processing: Goodfellas

Processing: Fight Club

Processing: The Green Mile

Processing: Interstellar
Completed batch 2

Processing batch 3 of 3

Processing: The Avengers

Processing: Schindler's List

Processing: The Lion King

Processing: Star Wars: Episode IV - A New Hope
Completed batch 3

Movie: Inception
--------------------------------------------------
Generated Description:
The film is about a speculative art dealer, Leonardo da Vinci, who travels through a world of dreams. He discovers a secret that he intended to keep hidden and a great idea he will bring back to life.

Original Genres: Action, Sci-Fi
Predicted Genres: Sci-Fi

Plot Twist:
Inception's narrator, Sean, is a teenage girl who dre