In [9]:
from pymongo import MongoClient

In [10]:
client = MongoClient()

In [11]:
for doc in \
    client.vectordb.company_key_data.find({
        'topic': 'G',
        'metric': True
    }):
    break

In [12]:
client.vectordb.company_key_data.distinct('company')

['Advanced Micro Devices, Inc.',
 'Apple Inc.',
 'Intel Corporation',
 'Microsoft Corporation',
 'NVIDIA Corporation',
 'Qualcomm Incorporated']

In [21]:
pipeline = [
    {
        '$match': {
            'tags': {'$regex': 'diversity', '$options': 'i'},  # Match documents based on query
            'metric': True,
            'company': 'NVIDIA Corporation',
            'topic': 'G'
        }
    },
    {
        '$addFields': {
            'descriptionLength': {'$strLenCP': '$description'}  # Add a field for the length of the description
        }
    },
    {
        '$sort': {'descriptionLength': 1}  # Sort documents by the length of the description in ascending order
    },
    {
        '$limit': 2  # Limit the result to
    }
]

# Execute the aggregation pipeline
results = client.vectordb.company_key_data.aggregate(pipeline)

# Print each document found
for document in results:
    print(document['value'], document['description'])

23% Percentage of Board members who are women.
12.0% Percentage of leaders who are women in FY22.


In [86]:
def get_metrics(company, topic, phrase):
    pipeline = [
        {
            '$match': {
                'tags': {'$regex': phrase, '$options': 'i'},  # Match documents based on query
                'metric': True,
                'company': company,
                'topic': topic
            }
        },
        {
            '$addFields': {
                'descriptionLength': {'$strLenCP': '$description'}  # Add a field for the length of the description
            }
        },
        {
            '$sort': {'descriptionLength': 1}  # Sort documents by the length of the description in ascending order
        },
        {
            '$limit': 2  # Limit the result to
        }
    ]

    # Execute the aggregation pipeline
    results = client.vectordb.company_key_data.aggregate(pipeline)

    # Print each document found
    return_res = []
    for document in results:
        return_res.append({'value': document['value'], 'description': document['description']})
    return return_res

## Environmental

In [87]:
# Energy efficiency
get_metrics('NVIDIA Corporation', 'E', 'efficiency')

[{'value': '3,492 gigajoules',
  'description': 'Reductions in energy consumption'},
 {'value': '20 percent',
  'description': 'Reduction in emissions per employee since FY14.'}]

In [88]:
# renewable energy
get_metrics('NVIDIA Corporation', 'E', 'renewable')

[{'value': '154,160 MWh',
  'description': 'Renewable electricity purchased in FY22.'},
 {'value': '38%', 'description': 'Renewable electricity percentage in FY22.'}]

In [73]:
# Emissions
get_metrics('NVIDIA Corporation', 'E', 'emissions')

[2,074,450] Scope 3 (tCO2e) in FY21.
[91,740 tCO2e] Scope 1 and 2 emissions.
[0.19 metric tons] Metric tons of VOC emitted.
[0.16 metric tons] Metric tons of SOX emitted.
[119] Gasoline emissions in FY19.
[102,026] Capital goods (tCO2e) in FY21.
[2 mtCO2e] Metric tons of methane emitted.
[118] Refrigerants emissions in FY19.
[3,068] Business travel (tCO2e) in FY21.
[14,764] Employee commuting (tCO2e) in FY21.
[0.17 metric tons] Metric tons of particulates emitted.
[2 mtCO2e] Metric tons of nitrous oxide emitted.
[3,327 mtCO2e] Metric tons of carbon dioxide emitted.
[0.51 metric tons] Metric tons of carbon monoxide emitted.
[12,357] Upstream leased assets (tCO2e) in FY21.
[91,740] Scope 1 and 2 emissions (tCO2e) in FY21.
[105,621] Scope 2, location-based (tCO2e) in FY21.
[2,379] Stationary natural gas emissions in FY19.
[1.4 metric tons] Metric tons of hydrofluorocarbon emitted.
[2,671] Total global GHG emissions (tCO2e) in FY19.


In [79]:
# Waste Management
get_metrics('NVIDIA Corporation', 'E', 'waste')

[56%] Landfill diversion rate in FY22.
[80 percent] Annual goal for landfill diversion.
[411] Metric tons of waste landfilled in FY22.
[78 percent] Landfill diversion rate achieved in FY20.
[577] Waste generated in operations (tCO2e) in FY21.
[224] Metric tons of general waste landfilled in FY22.
[2] Metric tons of hazardous waste recycled in FY22.
[75] Metric tons of electronic waste recycled in FY22.
[80 percent] Annual goal for landfill diversion at headquarters since FY08.
[80 percent] Landfill diversion rate for Silicon Valley headquarters campus.
[80%] Landfill diversion rate for Silicon Valley headquarters campus.
[186] Metric tons of construction/demolition waste landfilled in FY22.
[80%] Waste-to-landfill diversion rate at Silicon Valley headquarters.
[80%] Waste-to-landfill diversion rate at Silicon Valley headquarters.
[80 percent] Waste-to-landfill diversion rate or greater each year at our Silicon Valley headquarters.
[56%] Percentage of FY22 waste diverted from landfill, 

In [74]:
# water
get_metrics('NVIDIA Corporation', 'E', 'water')

[A-] CDP score for climate change and water security responses.
[42 percent] Reduction in domestic water demand in new headquarters building.
[42 percent] Reduction in domestic water demand in new headquarters building.
[91 percent] Reduction in potable water use for sewage conveyance in new headquarters building.
[91 percent] Reduction in potable water use for sewage conveyance in new headquarters building.
[carbon emissions and water consumption per product and per financial outlay] Metrics calculated by NVIDIA to determine the environmental impact of its products and financial outlay.
[46 percent] Less potable water used in new headquarters building compared to other campus buildings of a comparable size.
[Thousand cubic meters (m³)] Total water withdrawn by NVIDIA, a fabless semiconductor company that does not have its own manufacturing facilities.
[Total water withdrawn] Total amount of water withdrawn, including percentage of water withdrawn in regions with High or Extremely High

### Social

In [53]:
# Gender Diversity
get_metrics('NVIDIA Corporation', 'S', 'gender')

[5.7%] Turnover rate for men in FY19.
[5.6%] Turnover rate for women in FY19.
[80.9] Percentage of men in the workforce.
[18.9] Percentage of women in the workforce.
[80.4%] Percentage of male employees in FY22.
[19.0%] Percentage of female employees in FY22.
[80.5%] Percentage of men in the workforce in FY20.
[11.0%] Percentage of leaders held by women in FY19.
[16.5%] Percentage of managers held by women in FY19.
[18.9%] Percentage of women in the workforce in FY20.
[20.4%] Percentage of female employees in the company.
[18.0%] Percentage of outside directors held by women in FY19.
[19.1%] Percentage of new hires globally who are women in FY22.
[40.0%] Percentage of executive officers held by women in FY19.
[0.94%] Percentage of employees who did not declare their gender.
[16.8%] Percentage of women promoted in FY22, compared to 15.7% of men.
[0.21%] Percentage of employees who did not declare their gender in FY19.
[0.6%] Percentage of employees who did not declare their gender in FY

In [47]:
# Diversity
get_metrics('NVIDIA Corporation', 'S', 'diversity')

[265] Number of women hired globally.
[29%] Percentage of workforce from China.
[19%] Percentage of workforce from Europe.
[8%] Percentage of workforce from Europe.
[25%] Percentage of workforce from Taiwan.
[13%] Percentage of workforce from Europe.
[10.5%] Percentage of leaders that are women.
[80.4%] Percentage of male employees in FY22.
[11.3%] Percentage of leaders that are women.
[15.9%] Percentage of managers that are women.
[38.70%] Percentage of White employees in FY19.
[16.2%] Percentage of managers that are women.
[19.7%] Percentage of new hires that are women.
[19.0%] Percentage of female employees in FY22.
[6.7%] Turnover rate for men in the past year.
[6.9%] Turnover rate for women in the past year.
[40 percent] Increase in female speakers at GTC in 2019.
[5%] Percentage of workforce from United States.
[20%] Percentage of workforce from United States.
[5%] Percentage of workforce from United States.


In [67]:
# Inclusion
get_metrics('NVIDIA Corporation', 'S', 'inclusion')

[1.4%] Percentage of veterans in the U.S. in FY22.
[2.1%] Percentage of employees with disabilities in the U.S. in FY22.
[100%] Percentage of diverse hires tracked through the recruiting funnel.
[21,000] Number of people reached through developer conferences in seven countries.
[87 percent] Percentage of employees who say that our teams value diverse perspectives.
[tripled] Number of Black and African American employees increased in the past year.
[507] Number of promotions given to members of minority groups in the U.S. in FY19.
[50 percent] Increase in female attendance at the GPU Technology Conference (GTC) in 2019.
[6] Number of years in a row that GTC has included events specifically for women.
[tripled] Increase in Black and African American employees in the U.S. in the last year.
[2,000] Number of Mellanox employees welcomed from Israel and the Palestinian Authority.
[24] Number of underrepresented communities in technology that have been hired in FY21.
[50%] Percentage of uncon

In [66]:
# Education
get_metrics('NVIDIA Corporation', 'S', 'education')

[tens of thousands] Number of developers educated each year.
[1,000] Number of courses using GPUs being taught in universities worldwide, supported by NVIDIA.
[10,000 more K-12, community college, and HBCU students] Number of students who will have access to AI curriculum through the AI Education Project investment.
[1.3 million] Amount donated by NVIDIA and its employees to education efforts in FY19, reaching nearly 80,000 youth.
[several hundred] Number of students from the Santa Clara Unified School District engaged through the Techsplorer program.
[50 women] Number of women given access to deep learning courses through the Women's Early Career Accelerator program.
[10,000] Number of students reached by NVIDIA's Techsplorer kits through the AI Family Challenge initiative since its launch in FY18.
[Grants for Developer Kits] Number of Jetson Nano developer kits given to educators in colleges, schools, and nonprofit groups as part of a grant program.
[9] Number of organizations receiv

### Governance

In [77]:
# Compliance
get_metrics('NVIDIA Corporation', 'G', 'compliance')

[13] Number of suppliers engaged on their CAPs from the FY21 auditing season.
[100%] Percentage of strategic suppliers that completed self-assessment questionnaires (SAQs).
[100%] Percentage of NVIDIA's SAQ review that determined no high-risk suppliers in the top spending area.
[60%] Anti-bribery policy update in process and anti-trust policy being finalized for roll out during FY20.
[100 percent] Percentage of employees who complete code of conduct training upon hire and every two years thereafter.
[0%] Percentage of suppliers that required corrective action plans (CAPs) due to lack of high-risk suppliers.
[100%] Percentage of employees who had completed code of conduct, ethics, and sexual harassment training as of March 2022.
[100%] Percentage of strategic suppliers that completed self-assessment questionnaires (SAQ) in the top 80% of NVIDIA spending.
[100%] Percentage of processing facilities in supply chain that are compliant with Responsible Minerals Assurance Process (RMAP).
[100

In [78]:
# Supply Chain
get_metrics('NVIDIA Corporation', 'G', 'supply')

[100%] Percentage of processing facilities in supply chain that are compliant with Responsible Minerals Assurance Process (RMAP).
[100%] Percentage of suppliers ranked for compliance with RBA Code of Conduct, leveraging RBA membership and using RBA-Online platform.


In [26]:
pipeline = [
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags"}},
    {"$group": {"_id": None, "uniqueValues": {"$push": "$_id"}}},  # Collects all unique values into a single array
    {"$project": {"_id": 0, "uniqueValues": 1}}  # Optional: Formats the output to only show the array of unique values
]

unique_values = client.vectordb.company_key_data.aggregate(pipeline)

# Assuming you want to print the array of unique values
for value in unique_values:
    for tag in value['uniqueValues']:
        print(tag)

environmental data
Circular Centers
sustainable energy
sustainable IT practices
carbon avoidance
compensation committee
supplier recognition
data management
inclusive culture
management review
instructional design
ESG data summary
supplier management
environmental
SBTI
processors
IT
supercomputing
toxicology
speed
environmental justice
ethics
share repurchase
justice
STEM
CSR recognition
financial data
material challenges
cloud migration
CSR
manufacturing sites
biometrics
social goals
child labor
rewards and recognition
community development
responsible minerals
employee resource groups
chemical substances
gpu
server
partnerships
sustainable solutions
customer satisfaction
gender imbalance
supplier audit
economic opportunity
iOS
sustainable development
inclusive
public policy engagement
messaging
2030
disability inclusion
renewable integration
predictive modeling
GPU-powered
environmental regulations
climate science
engagement
waste management
csr report
community impact
programming
ro

In [75]:
pipeline = [
    {
        '$match': {
            'metric': True,
            'topic': 'G'
        }
    },
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
    # {"$group": {"_id": None, "uniqueValues": {"$push": "$_id"}}},  # Collects all unique values into a single array
    # {"$project": {"_id": 0, "uniqueValues": 1}}  # Optional: Formats the output to only show the array of unique values
    {"$sort": {"count": -1}}  # Optional: sorts the results by count in descending order
]

unique_values = client.vectordb.company_key_data.aggregate(pipeline)

# Assuming you want to print the array of unique values
for value in unique_values:
    print(value)

{'_id': 'compliance', 'count': 31}
{'_id': 'supply chain', 'count': 26}
{'_id': 'supplier', 'count': 24}
{'_id': 'training', 'count': 20}
{'_id': 'risk management', 'count': 20}
{'_id': 'diversity', 'count': 14}
{'_id': 'partnerships', 'count': 13}
{'_id': 'governance', 'count': 13}
{'_id': 'sustainability', 'count': 13}
{'_id': 'audit', 'count': 12}
{'_id': 'cybersecurity', 'count': 11}
{'_id': 'investment', 'count': 11}
{'_id': 'philanthropy', 'count': 11}
{'_id': 'financial performance', 'count': 10}
{'_id': 'innovation', 'count': 9}
{'_id': 'ethics', 'count': 8}
{'_id': 'board of directors', 'count': 8}
{'_id': 'security', 'count': 8}
{'_id': 'employee engagement', 'count': 6}
{'_id': 'climate change', 'count': 6}
{'_id': 'board diversity', 'count': 6}
{'_id': 'performance', 'count': 6}
{'_id': 'finance', 'count': 6}
{'_id': 'employee development', 'count': 5}
{'_id': 'modern slavery', 'count': 5}
{'_id': 'privacy', 'count': 5}
{'_id': 'leadership', 'count': 5}
{'_id': 'award', 'co