In [None]:
#imports
import os
import csv
import requests
import bs4
import subprocess
import re

### URL -> need to be changed to take an input from the user

In [None]:
url = "https://developer.android.com/sdk/api_diff/33/changes/alldiffs_index_changes" #URL from android website - only changes

#Request and creation of Beautiful Soup with the response
response = requests.get(url)
soup = bs4.BeautifulSoup(response.content, "html.parser")

#getting links from the webpage
links = []
for a in soup.find_all("a"):
    if a.has_attr("href") and a["href"].startswith("/sdk/api_diff/33/changes/"):
        links.append(a["href"])

#creation and writing the file with DictWriter
csv_file = open("classes.csv", "w", newline="")
csv_writer = csv.DictWriter(csv_file, fieldnames=["Package Class"])
csv_writer.writeheader()

#technically not writing any links but the classes themselves
for link in links:
    css_class = link.split('/')[-1]
    csv_writer.writerow({"Package Class": css_class})


#JSON not required
#json_file = open("classes.json", "w")
#json_data = {"links": links}
#json.dump(json_data, json_file)

### Takes the classes.csv file and strips unnecessary info

In [None]:
def modify_csv(csv_path):
    with open(csv_path, "r") as f:
        reader = csv.DictReader(f)
        with open("modified_csv.csv", "w") as fw:
            writer = csv.DictWriter(fw, fieldnames=["Package Class"])
            writer.writeheader()
            for row in reader:
                class_name = row["Package Class"]
                class_name = class_name.split("#")[0]
                class_name = class_name.lstrip(".")
                class_name = class_name.strip("[]")
                class_name = class_name.rstrip(".")
                row["Package Class"] = class_name
                writer.writerow(row)

if __name__ == "__main__":
    csv_path = "classes.csv"
    modify_csv(csv_path)

### takes the CSV and compiles it into a unique list

In [None]:
with open("modified_csv.csv", "r") as f:
    classes = set()
    for line in f:
        class_name = line.strip().split(",")[0]
        classes.add(class_name)

with open("unique_classes.csv", "w") as fw:
    writer = csv.writer(fw)
    for class_name in classes:
        writer.writerow([class_name])

### Only retain the last part

In [None]:
def simplify(csv_file, output_file):
    with open(csv_file, "r") as f:
        reader = csv.reader(f)
        lines = []
        for row in reader:
            new_row = []
            for item in row:
                end_index = item.rfind(".")
                if end_index == -1:
                    new_row.append(item)
                else:
                    simplified_item = item[end_index + 1:]
                    new_row.append(simplified_item)
            lines.append(new_row)

    with open(output_file, "w") as f:
        writer = csv.writer(f)
        writer.writerows(lines)


if __name__ == "__main__":
    csv_file = "unique_classes.csv"
    output_file = "simplify_classes.csv"
    simplify(csv_file, output_file)


### finds matching classes and appends them into a CSV

In [None]:
def match_csv_java(directory, csv_file):
    java_files = []
    for root, directories, files in os.walk(directory):
        for file in files:
            if file.endswith(".java"):
                java_files.append(os.path.join(root, file))

    if os.path.exists(csv_file):
        with open(os.path.join(directory, csv_file), "r") as f:
            reader = csv.reader(f)
            for row in reader:
                if row:
                    classes = row[0]

                    for java_file in java_files:
                        with open(java_file, "r", encoding="utf-8") as f:
                            text = f.read()
                            if classes in text:
                                with open("output.csv", "a") as csvfile:
                                    csvwriter = csv.writer(csvfile)
                                    csvwriter.writerow([classes, java_file])


if __name__ == "__main__":
    match_csv_java("/Users/nvsr/Downloads/e-mission-phone-nvsr-iter1-API_Migration_aid_script_patch-1/plugins", "/Users/nvsr/Downloads/e-mission-phone-nvsr-iter1-API_Migration_aid_script_patch-1/bin/API_Migration_scripts_readme/simplify_classes.csv")


### Removes the 2nd column which has the PATH and creates a unique

In [None]:
def get_unique_packages(csv_file):
    with open(csv_file, "r") as f:
        reader = csv.reader(f)
        packages = set()
        for row in reader:
            if row:
                packages.add(row[0])

    with open("unique_packages.csv", "w") as f:
        csvwriter = csv.writer(f)
        for package in packages:
            csvwriter.writerow([package])

if __name__ == "__main__":
    get_unique_packages("output.csv")

# Now the process is repeated by scraping but this time, we save them as links and then use those to scrape

### Scrape links

In [None]:
def get_links(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = bs4.BeautifulSoup(response.content, "html.parser")

        links = []

        for a in soup.find_all("a"):
            if "href" in a.attrs:
                links.append(a["href"])

        return links

def main():
    links = get_links("https://developer.android.com/sdk/api_diff/33/changes/alldiffs_index_changes")

    with open("links.csv", "w", newline="") as f:
        csvwriter = csv.writer(f)
        for link in links:
            csvwriter.writerow([link])

if __name__ == "__main__":
    main()

### take the above CSV and simplify them

In [None]:
def filter_links(csv_file, output_file):
    with open(csv_file, "r") as f:
        reader = csv.reader(f)
        links = []
        for row in reader:
            if row[0].startswith("/sdk"):
                links.append(row)

    with open(output_file, "w") as f:
        csvwriter = csv.writer(f)
        csvwriter.writerows(links)


if __name__ == "__main__":
    filter_links("links.csv", "simplified_links.csv")

### Gets the methods

In [None]:
def get_contents(url):
    response = requests.get(f"https://developer.android.com{url}")

    if response.status_code == 200:
        soup = bs4.BeautifulSoup(response.content, "html.parser")

        tables = soup.find_all("table")

        contents = []

        for table in tables:
            table_contents = []
            for row in table.find_all("tr"):
                row_contents = []
                for cell in row.find_all("td"):
                    row_contents.append(cell.text)
                table_contents.append(row_contents)
            contents.append(table_contents)

        return contents

def main():
    with open("simplified_links.csv", "r") as f:
        reader = csv.reader(f)
        links = []
        for row in reader:
            links.append(row[0])

    new_contents = []

    for link in links:
        contents = get_contents(link)

        with open("methods_scraped.csv", "a") as f:
            csvwriter = csv.writer(f)
            csvwriter.writerow([link] + contents)

if __name__ == "__main__":
    main()


### Removes unncessary elements

In [None]:
def remove_columns(csv_file, output_file, column_numbers):
    with open(csv_file, "r") as f:
        reader = csv.reader(f)
        rows = []
        for row in reader:
            new_row = [row[i] for i in range(len(row)) if i not in column_numbers]
            rows.append(new_row)

    with open(output_file, "w") as f:
        csvwriter = csv.writer(f)
        csvwriter.writerows(rows)


remove_columns("methods_scraped.csv", "methods.csv", [1, 2])