In [None]:
import requests

In [2]:
import pandas as pd

I chose this dataset on programming languages to explore their history, paradigms, and popularity. The goal is to see when different languages were created, how they are used, and how popular they are today. By analyzing this, I hope to understand trends in programming and what makes some languages more widely adopted than others.

In [None]:
# list of programming languages representing both old and modern trends

languages = ["Python", "JavaScript", "Java", "C++", "C#", 
             "Go", "Rust", "TypeScript", "Swift", "Kotlin", 
             "PHP", "Ruby", "Assembly", "SQL", "MATLAB", 
             "R", "Scala", "Perl", "Lua", "Objective-C"]

In [None]:
# year in which the programming languages were introduced, this will help me explore interesting insights
# - Do newer languages have fewer StackOverflow questions since they are younger?
# - Are older languages still widely used or declining?

years = [1991, 1995, 1995, 1985, 2000,
         2009, 2010, 2012, 2014, 2011,
         1995, 1995, 1949, 1974, 1984,
         1993, 2003, 1987, 1993, 1984]

In [None]:
# defines a list of programming paradigms associated with each language

paradigms = ["OOP, Functional", "OOP, Functional", "OOP", "OOP, Procedural", "OOP",
             "Concurrent, Procedural", "Systems, Memory-Safe", "OOP, Functional", "OOP", "OOP",
             "Scripting, OOP", "Scripting, OOP", "Low-level", "Query Language", "Numeric",
             "Statistical", "Functional, OOP", "Scripting", "Scripting", "OOP"]

In [None]:
# initialized to store StackOverflow tag counts

tag_counts = []

In [None]:
# loops over each programming language and queries the Stack Overflow API
# for each language, it looks up its tag and retrieves the number of questions posted
# if found, the count is saved; otherwise, None is added
# this forms the popularity measure of each language

for lang in languages:
    url = "https://api.stackexchange.com/2.3/tags"
    params = {
        "order": "desc",
        "sort": "popular",
        "inname": lang.lower(),
        "site": "stackoverflow"
    }
    resp = requests.get(url, params=params)
    data = resp.json()

    if "items" in data and len(data["items"]) > 0:
        count = data["items"][0]["count"]  # takes the most popular tag
    else:
        count = None
    tag_counts.append(count)


In [None]:
# ensures the list of tag counts is exactly the same length as the languages list

tag_counts = tag_counts[:20]

In [None]:
# double-checks that the tag_counts list has 20 entries
# if the API didn’t return enough values, it fills the missing spots with None

while len(tag_counts) < 20:
    tag_counts.append(None)

In [None]:
# pandas DataFrame that combines, langugages, thier introduction years, paradigms and tag counts

df = pd.DataFrame({
    "Language": languages,
    "SO_Tag_Count": tag_counts,
    "YearIntroduced": years,
    "Paradigm": paradigms
})

In [None]:
# selects first 20 rows

df = df.head(20)
df

In [49]:
df.to_csv("programming_languages_popularity_analysis.csv", index=False)
df

Unnamed: 0,Language,SO_Tag_Count,YearIntroduced,Paradigm
0,Python,2222218,1991,"OOP, Functional"
1,JavaScript,2535083,1995,"OOP, Functional"
2,Java,2535083,1995,OOP
3,C++,818031,1985,"OOP, Procedural"
4,C#,1627703,2000,OOP
5,Go,312696,2009,"Concurrent, Procedural"
6,Rust,44273,2010,"Systems, Memory-Safe"
7,TypeScript,236476,2012,"OOP, Functional"
8,Swift,337251,2014,OOP
9,Kotlin,99023,2011,OOP
