In [None]:
import wrds

# Connect to WRDS
db = wrds.Connection()

## Actual Script To Pull Transcripts

In [None]:
# List of tickers to query # INTC, AMD, NVDA, TSM, TXN 
tickers = ['INTC']

# Define the range of years
years = range(2010, 2025)  # From 2010 to 2024

In [None]:
# Loop through each ticker
for ticker in tickers:
    
    # Query to find companyID based on ticker
    query = f"""
    select distinct companyID
    from ciq.wrds_ciqsymbol 
    where symbolvalue = '{ticker}'
    and exchangeid = 458; -- NASDAQ exchange id
    """
    
    # Execute the query to get company ID
    companyid_df = db.raw_sql(query)
    
    # If no company ID is found, skip to the next ticker
    if companyid_df.empty:
        print(f"No company ID found for {ticker}")
        continue
    
    # Convert the 'companyID' to integer
    company_id = companyid_df['companyid'].astype(int).iloc[0]  # Get the first result
    
    # Loop through each year
    for year in years:
        
        # Query to fetch earnings calls for the given year and company ID
        sql_query = f"""
            WITH latest_transcripts AS (
                SELECT companyid
                    , headline
                    , mostimportantdateutc
                    , keydeveventtypeid
                    , keydeveventtypename
                    , companyname
                    , audiolengthsec
                    , MAX(transcriptid) AS transcriptid  -- Select the maximum transcriptid per date
                FROM ciq.wrds_transcript_detail
                WHERE companyid = {company_id}
                    AND date_part('year', mostimportantdateutc) = {year}
                GROUP BY companyid, headline, mostimportantdateutc, keydeveventtypeid, keydeveventtypename, companyname, audiolengthsec
            )
            SELECT a.companyid
                , a.headline
                , a.mostimportantdateutc
                , a.keydeveventtypeid
                , a.keydeveventtypename
                , a.companyname
                , a.audiolengthsec
                , b.transcriptid
                , b.transcriptcomponentid
                , b.transcriptcomponenttypename
                , b.transcriptpersonname
                , b.companyofperson
                , b.speakertypename
                , c.componentorder
                , c.componenttext
                , b.word_count
            FROM latest_transcripts AS a
            JOIN ciq.wrds_transcript_person AS b
                ON a.transcriptid = b.transcriptid
            JOIN ciq.ciqtranscriptcomponent AS c
                ON b.transcriptcomponentid = c.transcriptcomponentid
            WHERE a.keydeveventtypeid = 48  -- limits to earnings calls
            ORDER BY a.transcriptid, b.componentorder;
        """
        
        # Execute the query to get the earnings call data
        transcript_df = db.raw_sql(sql_query)
        
        # If the dataframe is not empty, save it to a CSV file
        if not transcript_df.empty:
            # Define the filename
            csv_filename = f"{ticker}_Earnings_Calls_{year}.csv"
            
            # Save the dataframe to CSV
            transcript_df.to_csv(csv_filename, index=False)
            
            # Print confirmation
            print(f"Saved {csv_filename}")
        else:
            print(f"No earnings call data for {ticker} in {year}")

In [None]:
db.close()

## Things to do

### 7. Sentiment Analysis - Biagio

Use Case: Quantify the sentiment of the text (positive, neutral, negative). You can also analyze the sentiment by section (e.g., executive vs. operator vs. Q&A) to assess the tone of different parts of the earnings calls.
Implementation: Use a pre-trained sentiment analysis model or create a custom model to calculate the sentiment score for each transcript. You can analyze how sentiment changes for each company over time, or how sentiment differs between companies.



### 10. Jaccard Similarity - biagio

Use Case: Compare the overlap of words or phrases between two transcripts. Jaccard similarity calculates the intersection over union of two sets of words.
Implementation: You can use Jaccard similarity to compare transcripts from different companies to see how much overlap exists in their vocabulary, which could reveal whether companies are discussing similar or divergent topics.



### 11. Dependency Parsing - biagio

Use Case: Understand the grammatical structure of the text and analyze relationships between words. This helps in capturing how executives form arguments or express uncertainty.
Implementation: Use dependency parsing to identify the syntactic relationships in the transcripts (e.g., subject-verb-object relationships). This can be particularly insightful for understanding how executives convey their message (e.g., confident statements vs. hedged language).