In [8]:
import re

# Defining the text data
concert_text = """
JUNE:*****Black Stone Cherry---CAPACITY---:1500 -- $ATTENDANCE: 1,315--GATE:--$28,492 ;
*****Lady Gaga ----CAPACITY---:25,000--- $ATTENDANCE: 24,368---GATE:--$461,956#;
*****Paramore ----CAPACITY---:3000 ---$ATTENDANCE: 3,000 ---GATE:-$150,000;
*****Rage Against the Machine---CAPACITY---:12000 ---$ATTENDANCE: 10,782 ---GATE: --$724,087;
*****BEYONCE---CAPACITY--:20000---$ATTENDANCE: 20,000—-GATE:$2,400,000*****
"""



### Task 1: Extracting Capacity and Attendance
#### Approach
In this task, we aim to extract the capacity and attendance numbers from the text. We will use regular expressions to identify patterns corresponding to capacity and attendance, capturing the numeric values.

#### Execution
Let me create the code and markdown for this task, and then I'll share the file with you.

In [7]:
class ConcertAnalysis:
    def __init__(self, text):
        self.text = text
    
    def task_one(self):
        """
        Extracts the Capacity and Attendance counts from the text.
        
        Parameters:
        None
        
        Returns:
        tuple: Two lists containing the Capacity and Attendance values as integers.
        """
        
        # Extracting Capacity values
        capacity_matches = re.findall(r'CAPACITY---:(\d+,?\d+)', self.text)  # Updated regex to capture commas
        capacities = [int(cap.replace(',', '')) for cap in capacity_matches]  # Removing commas before converting to int
        
        # Extracting Attendance values
        attendance_matches = re.findall(r'ATTENDANCE: (\d+,?\d+)', self.text)
        attendances = [int(att.replace(',', '')) for att in attendance_matches]
        
        return capacities, attendances

# Re-initializing the class
analysis = ConcertAnalysis(concert_text)

# Re-executing task one
capacities, attendances = analysis.task_one()

capacities, attendances

([1500, 25000, 3000, 12000], [1315, 24368, 3000, 10782, 20000])

### Task 2: Extracting Artist Names
#### Approach
In this task, we aim to extract the names of the musical artists from the text. Artist names are prefixed with five asterisks (*****). We will use a regular expression to match the patterns that follow the asterisks and capture the artist names.

In [4]:
class ConcertAnalysis:
    def __init__(self, text):
        self.text = text
    
    def task_two(self):
        """
        Extracts the names of each musical artist from the text.
    
        Parameters:
        None
    
        Returns:
        list: A list containing the names of the musical artists.
        """
    
        # Extracting artist names
        artist_matches = re.findall(r'\*{5}([^*-]+)', self.text)  # Updated regex to stop at dashes or asterisks
        artists = [artist.strip() for artist in artist_matches]
    
        return artists

# Re-initializing the class
analysis = ConcertAnalysis(concert_text)

# Executing task two
artists = analysis.task_two()

artists


['Black Stone Cherry',
 'Lady Gaga',
 'Paramore',
 'Rage Against the Machine',
 'BEYONCE',
 '']

### Task 3: Extracting Gross Ticket Revenue
#### Approach
In this task, our objective is to extract the gross ticket revenue from the text. The revenue information is prefixed with the word "GATE" and is presented in dollar amounts. We will use a regular expression to identify the pattern corresponding to the revenue and capture the numeric values associated with it.

In [5]:
class ConcertAnalysis:
    def __init__(self, text):
        self.text = text
    
    
    def task_three(self):
        """
        Extracts the Gross ticket revenue for each concert from the text.
    
        Parameters:
        None
    
        Returns:
        list: A list containing the gross ticket revenues as floats.
        """
    
        # Extracting gross ticket revenue
        revenue_matches = re.findall(r'GATE:[^$]*\$([\d,]+)', self.text)
        revenues = [float(revenue.replace(',', '')) for revenue in revenue_matches]
        
        return revenues

# Re-initializing the class
analysis = ConcertAnalysis(concert_text)

# Executing task three
revenues = analysis.task_three()

revenues


[28492.0, 461956.0, 150000.0, 724087.0, 2400000.0]

### Task 4: Creating a Nested Dictionary
#### Approach
In this task, we will create a nested dictionary that maps each artist to various calculated values, such as:

The average ticket price for each concert

A Boolean indicating whether or not the artist’s name is comprised of more than just a single word

How full the venue was (i.e., how much of the available capacity was filled according to the attendance)

In [6]:
class ConcertAnalysis:
    def __init__(self, text):
        self.text = text
    
    def task_four(self, artists, capacities, attendances, revenues):
        """
        Creates a nested dictionary with various calculated values for each artist.
    
        Parameters:
        artists (list): List of artist names.
        capacities (list): List of venue capacities.
        attendances (list): List of concert attendances.
        revenues (list): List of gross ticket revenues.
    
        Returns:
        dict: A nested dictionary with calculated values for each artist.
        """
    
        concert_info = {}
        for artist, capacity, attendance, revenue in zip(artists, capacities, attendances, revenues):
            concert_info[artist] = {
                'average_ticket_price': f"${revenue / attendance:.2f}",
                'multi_word_name': len(artist.split()) > 1,
                'venue_fill_percentage': f"{(attendance / capacity) * 100:.2f}%"
            }
    
        return concert_info

# Re-initializing the class
analysis = ConcertAnalysis(concert_text)

# Executing task four
concert_info = analysis.task_four(artists, capacities, attendances, revenues)

concert_info


{'Black Stone Cherry': {'average_ticket_price': '$21.67',
  'multi_word_name': True,
  'venue_fill_percentage': '87.67%'},
 'Lady Gaga': {'average_ticket_price': '$18.96',
  'multi_word_name': True,
  'venue_fill_percentage': '97.47%'},
 'Paramore': {'average_ticket_price': '$50.00',
  'multi_word_name': False,
  'venue_fill_percentage': '100.00%'},
 'Rage Against the Machine': {'average_ticket_price': '$67.16',
  'multi_word_name': True,
  'venue_fill_percentage': '89.85%'}}

### Task 5: Correcting a Regular Expression
#### Approach
In this task, a regular expression is given that fails to match a specific text string. Our goal is to correct this regular expression and explain why the initial one failed.

The given text string is 'FIdD1E7h=' and the regular expression to match is '\D[a-zA-Z]*[^,]='

In [9]:
class ConcertAnalysis:
    def __init__(self, text=None):
        self.text = text
    
    
    def task_five(self, text="FIdD1E7h="):
        """
        Matches a string with a corrected regular expression.
        
        Parameters:
        text (str): The text to match against the regular expression. Default is "FIdD1E7h=".
        
        Returns:
        str: The matched string or "No match" if no match is found.
        """
        
        # Corrected regular expression
        regex = r"\S+[^,]="
        
        match = re.search(regex, text)
        return match.group() if match else "No match"

# Re-initializing the class
analysis = ConcertAnalysis()

# Executing task five with the corrected regex
match_result = analysis.task_five()

match_result


'FIdD1E7h='

### Task 6: Extracting an Adverb from a Sentence
#### Approach
In this task, we aim to extract an adverb from a sentence. The adverb in question ends with "ly." The given regular expression "$*\s+ly\w+" fails to achieve this. We will correct this regular expression to successfully extract the adverb and explain why the initial expression didn’t work.

In [10]:
class ConcertAnalysis:
    def __init__(self, text=None):
        self.text = text
    
    def task_six(self, text="The spy was carefully disguised"):
        """
        Extracts an adverb from a sentence.
        
        Parameters:
        text (str): The sentence to extract the adverb from. Default is "The spy was carefully disguised".
        
        Returns:
        str: The extracted adverb or "No match" if no adverb is found.
        """
        
        # Corrected regular expression to match words ending with 'ly'
        regex = r"\b\w+ly\b"
        
        match = re.search(regex, text)
        return match.group() if match else "No match"

# Re-initializing the class
analysis = ConcertAnalysis()

# Executing task six with the corrected regex
adverb = analysis.task_six()

adverb


'carefully'