# **1. Libraries Installation**

In [1]:
!pip install beautifulsoup4
!pip install pandas
!pip install tabulate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **2. Data Retrieval and Storage**

In [2]:
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

# Constants
WIKI_URL = "https://en.wikipedia.org/wiki/Comparison_of_programming_languages"
CSV_FILE = "prog_lang_comparison.csv"
TABLE_CLASS = "wikitable"

def extract_data(url: str, file_name: str) -> None:
    try:
        html = urlopen(url)
    except Exception as e:
        print(f"Error accessing URL: {e}")
        return

    soup = BeautifulSoup(html, "html.parser")
    table = soup.find("table", {"class": TABLE_CLASS})
    if not table:
        print(f"No table found with class '{TABLE_CLASS}' on page {url}")
        return

    rows = table.findAll("tr")
    with open(file_name, "w", newline="") as f:
        writer = csv.writer(f)
        for row in rows:
            cells = row.findAll(["td", "th"])
            writer.writerow([cell.get_text() for cell in cells])
    print(f"Data extracted successfully and stored in file '{file_name}'.")
    
# Call the function to extract data and store it in a CSV file
extract_data(WIKI_URL, CSV_FILE)

Data extracted successfully and stored in file 'prog_lang_comparison.csv'.


# **3. Data Reading and Manipulation**

In [3]:
import pandas as pd

# Read data from the CSV file using the Pandas library
df = pd.read_csv(CSV_FILE)

# Display the number of columns and rows
print(f"The file contains {len(df.columns)} columns and {len(df)} rows.")

# Display the column names
print("The columns of the file are as follows:")
print(list(df.columns))

The file contains 11 columns and 138 rows.
The columns of the file are as follows:
['Language\n', 'Original purpose\n', 'Imperative\n', 'Object-oriented\n', 'Functional\n', 'Procedural\n', 'Generic\n', 'Reflective\n', 'Event-driven\n', 'Other paradigms\n', 'Standardized?\n']


# **4. Data Visualization**

In [4]:
from tabulate import tabulate

# Function to display data as a table
def display_table(df):
    """
    Display the data in a Pandas DataFrame as a formatted table.
    """
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    print(tabulate(df, headers='keys', tablefmt='psql'))

# Display all rows and columns of the data as a table
print("Here is the data as a table:")
display_table(df)

Here is the data as a table:
+-----+------------------------------------+--------------------------------------------------------------------------------------+--------------+----------------------------------------+------------------+--------------+---------------------------------------+--------------+----------------------------+---------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
|     | Language                           | Original purpose                                                                     | Imperative   | Object-oriented                        | Functional       | Procedural   | Generic                               | Reflective   | Event-driven               | Other paradigms                                                                                       