In [9]:
import pandas as pd
import numpy as np
from itertools import combinations

from scipy.stats import mannwhitneyu, spearmanr, t, chisquare

from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
from matplotlib.patches import ConnectionPatch
from matplotlib.collections import PatchCollection
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import to_rgba
import matplotlib.patches as mpatches

sns.set_style('darkgrid')

In [7]:
from google.colab import files
uploaded = files.upload()

Saving all_data.csv to all_data (1).csv


## Data cleaning

In [10]:
df = pd.read_csv('all_data.csv')


# Filter out non-consents and people who indicating spending no time working on AI systems
df['Screener'] = pd.to_numeric(df.Screener, errors = 'coerce')
df = df[(df.Consent == 1) & (df.Screener > 0)]

# Drop unneeded columns
df.drop(columns=['StartDate', 'EndDate', 'Status', 'RecordedDate', 'UserLanguage', 'sig_year_5_TEXT',
                 'comp_year_5_TEXT', 'Consent', 'more_thoughts'], inplace=True)

# Ignore "I don't know" responses for cost and GPU-hours
df['CompCost'] = df.CompCost.apply(lambda x: pd.NA if x == "6" else x)
df['CompGPUs'] = df.CompGPUs.apply(lambda x: pd.NA if x == "7" else x)
df['SigCost'] = df.SigCost.apply(lambda x: pd.NA if x == "6" else x)
df['SigGPUs'] = df.SigGPUs.apply(lambda x: pd.NA if x == "7" else x)

# All columns currently have numeric data but are recorded as strings -- convert them all
for column in df.columns[5:]:
    df[column] = pd.to_numeric(df[column], errors = 'coerce')

# Set the values for "sector" to their text meanings
sector_values = {0: 'Academia', 1: 'Industry', 2: 'Government', 3: 'Other'}
df['Sector'] = df.Sector.apply(lambda x: sector_values[x] if x in sector_values else pd.NA)

# Set the values for "company size" to their text meanings
size_values = {0: '<50 Employees', 1: '50-100 Employees', 2: '101-500 Employees', 3: '>500 Employees'}
df['CompanySize'] = df.CompanySize.apply(lambda x: size_values[x] if x in sector_values else pd.NA)

# Create a new column called "PrioritySimplified" and set values for "TopPriority" to text values
priorities = {0: 'Collecting more data', 1: 'Refining or cleaning data', 2: 'Purchasing more or higher-quality compute',
             3: 'Hiring more programmers or engineers', 4: 'Hiring researchers', 5: 'Doing more evaluation or testing'}
df['TopPriority'] = df.TopPriority.map(priorities)
simplified = {'Collecting more data': 'Data', 'Refining or cleaning data': 'Data',
             'Hiring researchers': 'Talent', 'Hiring more programmers or engineers': 'Talent',
             'Purchasing more or higher-quality compute': 'Compute',
             'Doing more evaluation or testing': 'Evaluation'}
df['PrioritySimplified'] = df.TopPriority.map(simplified)

# If the most significant project was also the most compute-intensive, duplicate the answers to both columns
df['CompCost'] = df.apply(lambda x: x['CompCost'] if x['SameProject'] == 0. else x['SigCost'], axis=1)
df['CompGPUs'] = df.apply(lambda x: x['CompGPUs'] if x['SameProject'] == 0. else x['SigGPUs'], axis=1)
df['CompTeamSize'] = df.apply(lambda x: x['CompTeamSize'] if x['SameProject'] == 0. else x['SigTeamSize'], axis=1)
df['CompYear'] = df.apply(lambda x: x['CompYear'] if x['SameProject'] == 0. else x['SigYear'], axis=1)
df['CompComponent'] = df.apply(lambda x: x['CompComponent'] if x['SameProject'] == 0. else x['SigComponent'], axis=1)

# Create separate dfs for different subsets of data
all_responses = df.copy()
no_snowball = df[df.DistributionChannel == 'email'].copy()
completed = df[df.Finished == 1].copy()

  df['PrioritySimplified'] = df.TopPriority.map(simplified)


## Identify which data version we're using

In [11]:
version = no_snowball
# version = all_responses
# version = completed

## High Compute Users

In [12]:
# How many high compute users are there, defined by GPUs
high_compute_users = version[version.CompGPUs >= 4]
print(f"There are {len(high_compute_users)} high compute users (>5000 GPU hours).")
low_compute_users = version[version.CompGPUs <= 1]
print(f"There are {len(low_compute_users)} low compute users (<=50 GPU hours).")
print()
for want in ["WantsCompute", "WantsData", "WantsStaff", "WantsGrants", "WantsStandards"]:
  print(f"{len(high_compute_users[high_compute_users[want] == 1])} high compute users {want} and {len(low_compute_users[low_compute_users[want] == 1])} low compute users {want}")
  print(f"{(len(high_compute_users[high_compute_users[want] == 1])/len(high_compute_users))*100:.2f}% of high compute users {want} and {(len(low_compute_users[low_compute_users[want] == 1])/len(low_compute_users))*100:.2f}% of low compute users {want}")
  print()
print("-----\n")
for field in ["CV", "NLP", "Robotics", "RL", "Other"]:
  print(f"The # of high compute users in {field} is {len(high_compute_users[high_compute_users[field] == 1])} and the # of low compute users is {len(low_compute_users[low_compute_users[field] == 1])}")
  print(f"The % of high compute users in {field} is {(len(high_compute_users[high_compute_users[field] == 1])/len(high_compute_users))*100:.2f}% and the % of low compute users is {(len(low_compute_users[low_compute_users[field] == 1])/len(low_compute_users))*100:.2f}%")
  print()
print("-----\n")
for subfield in ["RecommenderSystems", "Speech", "TimeSeriesData", "MusicAndAudio", "GraphAnalysis", "AlgorithmicOrArchitectureAnalysis", "NoneofThese"]:
  print(f"The # of high compute users in {subfield} is {len(high_compute_users[high_compute_users[subfield] == 1])} and the # of low compute users is {len(low_compute_users[low_compute_users[subfield] == 1])}")
  print(f"The % of high compute users in {subfield} is {(len(high_compute_users[high_compute_users[subfield] == 1])/len(high_compute_users))*100:.2f}% and the % of low compute users is {(len(low_compute_users[low_compute_users[subfield] == 1])/len(low_compute_users))*100:.2f}%")
  print()
print("-----\n")
for sector in ["Academia", "Industry"]:
  print(f"The # of high compute users in {sector} is {len(high_compute_users[high_compute_users.Sector == sector])} and the # of low computer users is {len(low_compute_users[low_compute_users.Sector == sector])}")
  print(f"The % of high compute users in {sector} is {(len(high_compute_users[high_compute_users.Sector == sector])/len(version[version.Sector == sector]))*100:.2f}% and the % of low compute users is {(len(low_compute_users[low_compute_users.Sector == sector])/len(version[version.Sector == sector]))*100:.2f}%")
  print(f"The % of {sector} respondents among high compute users is {(len(high_compute_users[high_compute_users.Sector == sector])/len(high_compute_users))*100:.2f}% and the % of {sector} respondents among low compute users is {(len(low_compute_users[low_compute_users.Sector == sector])/len(low_compute_users))*100:.2f}%")
print("\n-----\n")
highest_compute_users = version[version.CompGPUs >= 6]
for sector in ["Academia", "Industry"]:
  print(f"The # of highest compute users in {sector} is {len(highest_compute_users[highest_compute_users.Sector == sector])}")
  print(f"The % of {sector} respondents among highest compute users is {(len(highest_compute_users[highest_compute_users.Sector == sector])/len(highest_compute_users))*100:.2f}%")


There are 88 high compute users (>5000 GPU hours).
There are 70 low compute users (<=50 GPU hours).

64 high compute users WantsCompute and 37 low compute users WantsCompute
72.73% of high compute users WantsCompute and 52.86% of low compute users WantsCompute

47 high compute users WantsData and 32 low compute users WantsData
53.41% of high compute users WantsData and 45.71% of low compute users WantsData

30 high compute users WantsStaff and 21 low compute users WantsStaff
34.09% of high compute users WantsStaff and 30.00% of low compute users WantsStaff

64 high compute users WantsGrants and 52 low compute users WantsGrants
72.73% of high compute users WantsGrants and 74.29% of low compute users WantsGrants

26 high compute users WantsStandards and 20 low compute users WantsStandards
29.55% of high compute users WantsStandards and 28.57% of low compute users WantsStandards

-----

The # of high compute users in CV is 40 and the # of low compute users is 5
The % of high compute users

In [None]:
# How many high compute users are there, defined by cost
high_compute_cost_users = version[version.CompCost >= 4]
print(f"There are {len(high_compute_cost_users)} high compute users (>$100,000).")
low_compute_cost_users = version[version.CompCost <= 1]
print(f"There are {len(low_compute_cost_users)} low compute users (<=$1000).")
print()
for want in ["WantsCompute", "WantsData", "WantsStaff", "WantsGrants", "WantsStandards"]:
  print(f"{len(high_compute_cost_users[high_compute_cost_users[want] == 1])} high compute users {want} and {len(low_compute_cost_users[low_compute_cost_users[want] == 1])} low compute users {want}")
  print(f"{(len(high_compute_cost_users[high_compute_cost_users[want] == 1])/len(high_compute_cost_users))*100:.2f}% of high compute users {want} and {(len(low_compute_cost_users[low_compute_cost_users[want] == 1])/len(low_compute_cost_users))*100:.2f}% of low compute users {want}")

  print()

There are 45 high compute users (>$100,000).
There are 57 low compute users (<=$1000).

32 high compute users WantsCompute and 29 low compute users WantsCompute
71.11% of high compute users WantsCompute and 50.88% of low compute users WantsCompute

25 high compute users WantsData and 28 low compute users WantsData
55.56% of high compute users WantsData and 49.12% of low compute users WantsData

17 high compute users WantsStaff and 18 low compute users WantsStaff
37.78% of high compute users WantsStaff and 31.58% of low compute users WantsStaff

29 high compute users WantsGrants and 43 low compute users WantsGrants
64.44% of high compute users WantsGrants and 75.44% of low compute users WantsGrants

16 high compute users WantsStandards and 15 low compute users WantsStandards
35.56% of high compute users WantsStandards and 26.32% of low compute users WantsStandards



In [14]:
# high and low compute user preferences
for priority in ["Compute", "Talent", "Data"]:
  print(f"The # of high compute users who would prioritize {priority} is {len(high_compute_users[high_compute_users.PrioritySimplified == priority])} and the # of low compute users is {len(low_compute_users[low_compute_users.PrioritySimplified == priority])}")
print("\n-----\n")
for success in ["SuccessData", "SuccessTeamSize", "SuccessTalent", "SuccessCompute"]:
  print(f"The # of high compute users who attribute their success to {success[7:].lower()} is {len(high_compute_users[high_compute_users[success] >= 3])} and the # of low compute users is {len(low_compute_users[low_compute_users[success] >= 3])}")
print("\n-----\n")
for i, concern_level in enumerate(["not at all concerned", "slightly_concerned", "somewhat concerned", "moderately concerned", "extremely concerned"]):
  print(f"The # of high compute users whose concern level is {concern_level.replace('_', ' ')} is {len(high_compute_users[high_compute_users.ContributionConcern == i])} and the # of low compute users is {len(low_compute_users[low_compute_users.ContributionConcern == i])}")
  print(f"The % of high compute users whose concern level is {concern_level.replace('_', ' ')} is {(len(high_compute_users[high_compute_users.ContributionConcern == i])/len(high_compute_users))*100:.2f}% and the % of low compute users is {(len(low_compute_users[low_compute_users.ContributionConcern == i])/len(low_compute_users))*100:.2f}%")
  print()

The # of high compute users who would prioritize Compute is 23 and the # of low compute users is 7
The # of high compute users who would prioritize Talent is 46 and the # of low compute users is 39
The # of high compute users who would prioritize Data is 15 and the # of low compute users is 14

-----

The # of high compute users who attribute their success to data is 41 and the # of low compute users is 24
The # of high compute users who attribute their success to teamsize is 19 and the # of low compute users is 7
The # of high compute users who attribute their success to talent is 79 and the # of low compute users is 60
The # of high compute users who attribute their success to compute is 60 and the # of low compute users is 27

-----

The # of high compute users whose concern level is not at all concerned is 13 and the # of low compute users is 25
The % of high compute users whose concern level is not at all concerned is 14.77% and the % of low compute users is 35.71%

The # of high 

## Language Modeling

In [None]:
nlpers = version[version.NLP == 1]
language_modelers = version[version.LanguageModeling == 1]
non_model_nlpers = version[(version.LanguageModeling) == 0 & (version.NLP == 1)]
print(f"There are {len(nlpers)} NLP respondents.")
print(f"There are {len(language_modelers)} language modeling respondents.")
print(f"There are {len(language_modelers)} NLP respondents who report that they do not do language modeling.")
print("\n-----\n")

for sector in ["Academia", "Industry"]:
  print(f"The # of language modelers in {sector} is {len(language_modelers[language_modelers.Sector == sector])} while the # of NLP respondents is {len(nlpers[nlpers.Sector == sector])} and the # of non-language-model NLPers is {len(non_model_nlpers[non_model_nlpers.Sector == sector])}")
  print(f"The % of {sector} respondents among language modelers is {(len(language_modelers[language_modelers.Sector == sector])/len(language_modelers))*100:.2f}% while the % among NLP respondents is {(len(nlpers[nlpers.Sector == sector])/len(nlpers))*100:.2f}%")
  print()
print("-----\n")
for i, computeuse in enumerate(["no", "50 or fewer", "51-500", "501-5000", "5001-50,000", "50,001-500,000", "more than 500,000", "an unknown number of"]):
  print(f"The # of language modelers who report using {computeuse} GPUs is {len(language_modelers[language_modelers.CompGPUs == i])} while the # of NLP respondents is {len(nlpers[nlpers.CompGPUs == i])} and the # of non-language-model NLPers is {len(non_model_nlpers[non_model_nlpers.CompGPUs == i])}")
  print(f"The % of language modelers who report using {computeuse} GPUs is {(len(language_modelers[language_modelers.CompGPUs == i])/len(language_modelers))*100:.2f}% while the % of NLP respondents is {(len(nlpers[nlpers.CompGPUs == i])/len(nlpers))*100:.2f}%")
  print()
print("-----\n")
for i, computeuse in enumerate(["$0", "$1-$1000", "$1,001-$10,000", "$10,001-$100,000", "$100,001-$1,000,000", "more than $1,000,000", "an unknown amount of"]):
  print(f"The # of language modelers who report using {computeuse} money is {len(language_modelers[language_modelers.CompCost == i])} while the # of NLP respondents is {len(nlpers[nlpers.CompCost == i])} and the # of non-language-model NLPers is {len(non_model_nlpers[non_model_nlpers.CompCost == i])} ")
  print(f"The % of language modelers who report using {computeuse} money is {(len(language_modelers[language_modelers.CompCost == i])/len(language_modelers))*100:.2f}% while the % of NLP respondents is {(len(nlpers[nlpers.CompCost == i])/len(nlpers))*100:.2f}%")
  print()
print("-----\n")
for past in ["PastData", "PastCompute", "PastAlgorithms", "PastResearchers", "PastSupport"]:
  print(f"The # of language modelers who report {past[4:].lower()} as important to past progress is {len(language_modelers[language_modelers[past] == 4])} while the # of NLP respondents is {len(nlpers[nlpers[past] == 4])} and the # of non-language-model NLPers is {len(non_model_nlpers[non_model_nlpers[past] == 4])}")
  print(f"The % of language modelers who report {past[4:].lower()} as important to past progress is {(len(language_modelers[language_modelers[past] >= 3])/len(language_modelers))*100:.2f}% while the % of NLP respondents is {(len(nlpers[nlpers[past] >= 3])/len(nlpers))*100:.2f}%")
  print()
print("-----\n")
for future in ["FutureData", "FutureCompute", "FutureAlgorithms", "FutureResearchers", "FutureSupport"]:
  print(f"The # of language modelers who report {future[6:].lower()} as important to future progress is {len(language_modelers[language_modelers[future] == 4])} while the # of NLP respondents is {len(nlpers[nlpers[future] == 4])} and the # of non-language-model NLPers is {len(non_model_nlpers[non_model_nlpers[future] == 4])}")
  print(f"The % of language modelers who report {future[6:].lower()} as important to future progress is {(len(language_modelers[language_modelers[future] == 4])/len(language_modelers))*100:.2f}% while the % of NLP respondents is {(len(nlpers[nlpers[future] == 4])/len(nlpers))*100:.2f}%")
  print()

There are 143 NLP respondents.
There are 70 language modeling respondents.
There are 70 NLP respondents who report that they do not do language modeling.

-----

The # of language modelers in Academia is 40 while the # of NLP respondents is 83 and the # of non-language-model NLPers is 42
The % of Academia respondents among language modelers is 57.14% while the % among NLP respondents is 58.04%

The # of language modelers in Industry is 28 while the # of NLP respondents is 58 and the # of non-language-model NLPers is 28
The % of Industry respondents among language modelers is 40.00% while the % among NLP respondents is 40.56%

-----

The # of language modelers who report using no GPUs is 0 while the # of NLP respondents is 4 and the # of non-language-model NLPers is 4
The % of language modelers who report using no GPUs is 0.00% while the % of NLP respondents is 2.80%

The # of language modelers who report using 50 or fewer GPUs is 3 while the # of NLP respondents is 7 and the # of non-l

## Smaller companies

In [17]:
smaller_industry = version[((version.CompanySize == '<50 Employees') | (version.CompanySize == '50-100 Employees') | (version.CompanySize == '101-500 Employees')) & (version.Sector == "Industry")]
print(f"There are {len(smaller_industry)} smaller company respondents.")
print("\n-----\n")
for field in ["CV", "NLP", "Robotics", "RL", "Other"]:
  print(f"The # of smaller company respondents in {field} is {len(smaller_industry[smaller_industry [field] == 1])} and the # of all users is {len(version[version[field] == 1])}")
  print(f"The % of smaller company respondents in {field} is {(len(smaller_industry[smaller_industry [field] == 1])/len(smaller_industry))*100:.2f}% and the % of all users is {(len(version[version[field] == 1])/len(version))*100:.2f}%")
  print()
print("-----\n")
for priority in ["Compute", "Talent", "Data"]:
  print(f"The # of smaller company respondents who would prioritize {priority} is {len(smaller_industry[smaller_industry.PrioritySimplified == priority])} and the # of all users is {len(version[version.PrioritySimplified == priority])}")
  print(f"The % of smaller company respondents who would prioritize {priority} is {(len(smaller_industry[smaller_industry.PrioritySimplified == priority])/len(smaller_industry))*100:.2f}% and the % of all users is {(len(version[version.PrioritySimplified == priority])/len(version))*100:.2f}%")
  print()
print("-----\n")
for success in ["SuccessData", "SuccessTeamSize", "SuccessTalent", "SuccessCompute"]:
  print(f"The # of smaller company respondents who attribute their success to {success[7:].lower()} is {len(smaller_industry[smaller_industry[success] >= 3])} and the # of all users is {len(version[version[success] >= 3])}")
  print(f"The % of smaller company respondents who attribute their success to {success[7:].lower()} is {(len(smaller_industry[smaller_industry[success] >= 3])/len(smaller_industry))*100:.2f}% and the % of all users is {(len(version[version[success] >= 3])/len(version))*100:.2f}%")
  print()
print("-----\n")
for i, concern_level in enumerate(["not at all concerned", "slightly_concerned", "somewhat concerned", "moderately concerned", "extremely concerned"]):
  print(f"The # of smaller company respondents whose concern level is {concern_level.replace('_', ' ')} is {len(smaller_industry[smaller_industry.ContributionConcern == i])} and the # of all users is {len(version[version.ContributionConcern == i])}")
  print(f"The % of smaller company respondents whose concern level is {concern_level.replace('_', ' ')} is {(len(smaller_industry[smaller_industry.ContributionConcern == i])/len(smaller_industry))*100:.2f}% and the % of all users is {(len(version[version.ContributionConcern == i])/len(version))*100:.2f}%")
  print()
#concern with nonresponses dropped
print("-----\n")
print("With nonresponse dropped")
nonresponse_dropped = version.dropna(subset=["ContributionConcern"])
smaller_industry_dropped = nonresponse_dropped[(nonresponse_dropped.CompanySize == '<50 Employees') | (nonresponse_dropped.CompanySize == '50-100 Employees') | (nonresponse_dropped.CompanySize == '101-500 Employees')]
for i, concern_level in enumerate(["not at all concerned", "slightly_concerned", "somewhat concerned", "moderately concerned", "extremely concerned"]):
  print(f"The # of smaller company respondents whose concern level is {concern_level.replace('_', ' ')} is {len(smaller_industry_dropped[smaller_industry_dropped.ContributionConcern == i])} and the # of all users is {len(nonresponse_dropped[nonresponse_dropped.ContributionConcern == i])}")
  print(f"The % of smaller company respondents whose concern level is {concern_level.replace('_', ' ')} is {(len(smaller_industry_dropped[smaller_industry_dropped.ContributionConcern == i])/len(smaller_industry_dropped))*100:.2f}% and the % of all users is {(len(nonresponse_dropped[nonresponse_dropped.ContributionConcern == i])/len(nonresponse_dropped))*100:.2f}%")
  print()

There are 35 smaller company respondents.

-----

The # of smaller company respondents in CV is 14 and the # of all users is 150
The % of smaller company respondents in CV is 40.00% and the % of all users is 28.14%

The # of smaller company respondents in NLP is 16 and the # of all users is 143
The % of smaller company respondents in NLP is 45.71% and the % of all users is 26.83%

The # of smaller company respondents in Robotics is 7 and the # of all users is 72
The % of smaller company respondents in Robotics is 20.00% and the % of all users is 13.51%

The # of smaller company respondents in RL is 5 and the # of all users is 81
The % of smaller company respondents in RL is 14.29% and the % of all users is 15.20%

The # of smaller company respondents in Other is 9 and the # of all users is 160
The % of smaller company respondents in Other is 25.71% and the % of all users is 30.02%

-----

The # of smaller company respondents who would prioritize Compute is 4 and the # of all users is 8

## Cloud Computing

In [None]:
academics = version[version.Sector == "Academia"]
cloud = academics[academics.CloudUser == 1]
cloud_only = cloud[cloud.OnPremise == 0]
print(f"There are {len(academics)} academics.")
print(f"There are {len(cloud)} academics who use cloud computing.")
print(f"There are {len(cloud_only)} academics who only use cloud computing.")
print("\n-----\n")
for field in ["CV", "NLP", "Robotics", "RL", "Other"]:
  print(f"The # of cloud respondents in {field} is {len(cloud[cloud[field] == 1])} and the # of cloud only respondents is {len(cloud_only[cloud_only[field] == 1])} and the number of academic respondents is {len(academics[academics[field] == 1])}")
  print(f"The % of cloud respondents in {field} is {(len(cloud[cloud[field] == 1])/len(cloud))*100:.2f}% and the % of cloud only respondents is {(len(cloud_only[cloud_only[field] == 1])/len(cloud_only))*100:.2f}% and the % of academic respondents is {(len(academics[academics[field] == 1])/len(academics))*100:.2f}%")
  print()
print("-----\n")
for priority in ["Compute", "Talent", "Data"]:
  print(f"The # of cloud respondents who would prioritize {priority} is {len(cloud[cloud.PrioritySimplified == priority])} and the # of cloud only respondents is {len(cloud_only[cloud_only.PrioritySimplified == priority])} and the # of academic respondents is {len(academics[academics.PrioritySimplified == priority])}")
  print(f"The % of cloud respondents who would  prioritize {priority} is {(len(cloud[cloud.PrioritySimplified == priority])/len(cloud))*100:.2f}% and the % of cloud only respondents is {(len(cloud_only[cloud_only.PrioritySimplified == priority])/len(cloud_only))*100:.2f}% and the % of academic respondents is {(len(academics[academics.PrioritySimplified == priority])/len(academics))*100:.2f}%")
  print()
print("-----\n")
for i, concern_level in enumerate(["not at all concerned", "slightly_concerned", "somewhat concerned", "moderately concerned", "extremely concerned"]):
  print(f"The # of cloud respondents whose concern level is {concern_level.replace('_', ' ')} is {len(cloud[cloud.ContributionConcern == i])} and the # of cloud only respondents is {len(cloud_only[cloud_only.ContributionConcern == i])} and the # of academic respondents is {len(academics[academics.ContributionConcern == i])}")
  print(f"The % of cloud respondents whose concern level is {concern_level.replace('_', ' ')} is {(len(cloud[cloud.ContributionConcern == i])/len(cloud))*100:.2f}% and the % of cloud only respondents is {(len(cloud_only[cloud_only.ContributionConcern == i])/len(cloud_only))*100:.2f}% and the % of academic respondents is {(len(academics[academics.ContributionConcern == i])/len(academics))*100:.2f}%")
  print()

There are 274 academics.
There are 138 academics who use cloud computing.
There are 40 academics who only use cloud computing.

-----

The # of cloud respondents in CV is 41 and the # of cloud only respondents is 14 and the number of academic respondents is 95
The % of cloud respondents in CV is 29.71% and the % of cloud only respondents is 35.00% and the % of academic respondents is 34.67%

The # of cloud respondents in NLP is 44 and the # of cloud only respondents is 16 and the number of academic respondents is 83
The % of cloud respondents in NLP is 31.88% and the % of cloud only respondents is 40.00% and the % of academic respondents is 30.29%

The # of cloud respondents in Robotics is 27 and the # of cloud only respondents is 2 and the number of academic respondents is 58
The % of cloud respondents in Robotics is 19.57% and the % of cloud only respondents is 5.00% and the % of academic respondents is 21.17%

The # of cloud respondents in RL is 31 and the # of cloud only respondent