In [14]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.tokenize import sent_tokenize
from textblob import TextBlob

nltk.download('punkt')

# Function to extract article title from URL
def extract_article_title(url):
    try:
        # Use requests to fetch webpage content
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for bad status codes
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        
        # Extract article title
        article_title = soup.find('title').text.strip() if soup.find('title') else ''
        
        return article_title
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {url}. Exception: {e}")
        return None
    
    except Exception as e:
        print(f"Error extracting article title from URL: {url}. Exception: {e}")
        return None

# Function to compute text analysis variables
def compute_text_analysis(title):
    try:
        # Compute sentiment analysis on title using TextBlob
        blob = TextBlob(title)
        polarity_score = blob.sentiment.polarity
        subjectivity_score = blob.sentiment.subjectivity
        
        # Compute other variables as needed
        # For this example, we will return None for other variables
        return polarity_score, subjectivity_score
    
    except Exception as e:
        print(f"Error computing text analysis. Exception: {e}")
        return None, None

# Main script
def main():
    try:
        # Read input file
        input_file = 'Input.xlsx'
        df = pd.read_excel(input_file, header=0)
        
        # Initialize list to store output data
        output_data = []
        
        # Iterate through each URL in Input.xlsx
        for index, row in df.iterrows():
            url = row['URL']
            url_id = row['URL_ID']
            
            # Extract article title
            article_title = extract_article_title(url)
            
            if article_title is not None:
                # Compute text analysis variables on title
                polarity_score, subjectivity_score = compute_text_analysis(article_title)
                
                # Prepare data for output
                if polarity_score is not None:
                    data = {
                        'URL_ID': url_id,
                        'Article_Title': article_title,
                        'Polarity_Score': polarity_score,
                        'Subjectivity_Score': subjectivity_score
                        # Add more variables as needed
                    }
                    
                    output_data.append(data)
            
        # Create DataFrame from output_data
        output_df = pd.DataFrame(output_data)
        
        # Save output to CSV or Excel
        output_file = 'Output.csv'
        output_df.to_csv(output_file, index=False)  # Or use .to_excel() for Excel format
        
        print(f"Output saved to {output_file}")
    
    except Exception as e:
        print(f"Error in main script. Exception: {e}")

if __name__ == '__main__':
    main()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\abine\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Output saved to Output.csv
