In [1]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import requests
import csv

API_KEY = 'XXXXXXXXX'

In [2]:
def get_technology_video_ids(tags, max_results=1):
    """Récupère les identifiants de vidéos YouTube correspondant à une liste de tags et une catégorie.

    Args:
        tags: Une liste de chaînes de caractères représentant les tags à rechercher.
        max_results: Le nombre maximum de résultats à récupérer.

    Returns:
        Une liste d'identifiants de vidéos.
    """
    youtube = build("youtube", "v3", developerKey=API_KEY, cache_discovery=False)

    all_video_ids = []
    next_page_token = None

    # Diviser les tags en sous-listes de 10 tags maximum
    tag_groups = [tags[i:i + 10] for i in range(0, len(tags), 10)]

    for tag_group in tag_groups:
        while True:
            try:
                search_response = youtube.search().list(
                    part="snippet",
                    maxResults=min(max_results, 50),  # Limite à 10 résultats par requête
                    type="video",
                    videoCategoryId="28",  # Catégorie Technologie
                    q=" ".join(tag_group),
                    pageToken=next_page_token
                ).execute()

                video_ids = [item['id']['videoId'] for item in search_response['items']]
                all_video_ids.extend(video_ids)

                next_page_token = search_response.get('nextPageToken')
                if not next_page_token or len(all_video_ids) >= max_results:
                    break
            except HttpError as err:
                print(f"Erreur lors de la récupération des vidéos : {err}")
                break

    return all_video_ids

def get_video_info(video_id):
    """Récupère les informations détaillées d'une vidéo YouTube à partir de son identifiant.

    Args:
        video_id: L'identifiant de la vidéo.

    Returns:
        Un dictionnaire contenant les informations de la vidéo, ou None en cas d'erreur.
    """
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "id": video_id,
        "part": "snippet,statistics",
        "key": API_KEY
    }
    response = requests.get(url, params=params)

    if response.status_code == 200:
        return response.json()['items'][0]
    else:
        print(f"Erreur pour l'ID vidéo {video_id}: {response.status_code}")
        return None

# Exemple d'utilisation
tags = [ "Cloud Computing", "Kubernetes", "Docker", "DevOps", "CI/CD", "GitHub Actions", "Jenkins", "Terraform", 
    "Ansible", "CloudFormation", "AWS", "Microsoft Azure", "Google Cloud Platform", "Virtual Machines", 
    "Microservices", "Serverless", "Edge Computing", "SaaS", "PaaS", "IaaS", "Cybersecurity", "Network Security", 
    "Firewall", "Data Encryption", "Penetration Testing", "SIEM", "Zero Trust", "Threat Detection", 
    "Incident Response", "Ethical Hacking", "Machine Learning", "Deep Learning", "Natural Language Processing", 
    "Computer Vision", "Neural Networks", "AI Ethics", "Python", "TensorFlow", "PyTorch", "Data Science", 
    "Big Data", "Apache Hadoop", "Apache Spark", "SQL", "NoSQL", "MongoDB", "Cassandra", "Elasticsearch", 
    "Redis", "PostgreSQL", "MySQL", "Data Engineering", "ETL", "Data Pipelines", "Apache Kafka", 
    "Apache Flink", "Stream Processing", "Business Intelligence", "Tableau", "Power BI", "Data Visualization", 
    "Data Warehouse", "Snowflake", "Google BigQuery", "Amazon Redshift", "Data Governance", "Data Privacy", 
    "GDPR Compliance", "Blockchain", "Smart Contracts", "Web3", "NFTs", "DeFi", "Cryptocurrency", 
    "Ethereum", "Bitcoin", "Rust", "Go", "Swift", "JavaScript", "React", "Angular", "Vue.js", "TypeScript", 
    "Web Development", "Frontend Development", "Backend Development", "Full-Stack Development", "API Development", 
    "REST API", "GraphQL", "WebSockets", "Automation Testing", "Selenium", "Apache JMeter", "Load Testing", 
    "Performance Monitoring", "New Relic", "Observability",
    "Data Analytics", "Data Mining", "Predictive Analytics", "Data Management", "ETL Tools", "Data Cleansing", 
    "Data Quality", "Data Modeling", "Data Integration", "Data Wrangling", "Data Blending", "Data Transformation", 
    "Data Lineage", "Data Migration", "Data Federation", "Data Replication", "Data Mart", "Master Data Management", 
    "Metadata Management", "Data Lake", "Data Lakehouse", "OLAP", "OLTP", "Data Catalog", "Data Sampling", 
    "Data Extraction", "Data Ingestion", "Data Retention", "Database Optimization", "Database Tuning", 
    "Database Clustering", "Database Partitioning", "ACID Transactions", "CAP Theorem", "BigQuery ML", 
    "Redshift Spectrum", "Google Data Studio", "Databricks", "Azure Data Factory", "Informatica", 
    "Talend", "Fivetran", "Stitch Data", "Pentaho", "Airflow", "dbt", "Looker", "Metabase", 
    "QuickSight", "QlikView", "QlikSense", "Chartio", "Mode Analytics", "Sigma Computing", "Sisense", 
    "DataRobot", "Alteryx", "RapidMiner", "SPSS", "SAS", "MicroStrategy", "TIBCO Spotfire", 
    "KNIME", "SQLAlchemy", "Apache Superset", "Grafana", "Prometheus", "Elastic Stack", 
    "Splunk", "Machine Learning Operations (MLOps)", "Real-time Analytics", "Event Stream Processing", 
    "Business Rules Management", "Business Process Automation", "DataOps", "Feature Engineering", 
    "Dimensional Modeling", "Fact Tables", "Star Schema", "Snowflake Schema", "Data Dictionary", 
    "Relational Databases", "Columnar Databases", "Key-Value Stores", "Document Databases", 
    "Graph Databases", "Time-Series Databases", "Object Databases", "Data Aggregation", "Batch Processing", 
    "Real-time Data Processing", "Data Sharding", "Data Tokenization", "Encryption-at-Rest", "Self-Service Analytics", 
    "Interactive Dashboards", "KPI Tracking", "Embedded Analytics", "Data Enrichment", "Data Privacy Shield", 
    "Multi-cloud Analytics", "Data Augmentation", "Query Optimization", "Workflows", "SQL Optimization", 
    "Distributed Computing", "High-Performance Analytics", "Data Cubes", "JSON Databases", "Data Mapping",
    "Python", "Java", "JavaScript", "C++", "R", "SQL", "Scala", "Go", "Kotlin", "Swift",
    "Rust", "PHP", "TypeScript", "HTML5", "CSS3", "Node.js", "Docker", "Kubernetes", "Terraform", 
    "Ansible", "Jenkins", "Git", "GitHub", "GitLab", "AWS (Amazon Web Services)", "Microsoft Azure", 
    "Google Cloud Platform", "Alibaba Cloud", "IBM Cloud", "OpenStack", "Apache Spark", "Apache Hadoop", 
    "Apache Kafka", "Apache Flink", "TensorFlow", "PyTorch", "Keras", "scikit-learn", "OpenCV", 
    "NLTK (Natural Language Toolkit)", "Pandas", "NumPy", "Matplotlib", "Seaborn", "Jupyter Notebook", 
    "Apache Cassandra", "MongoDB", "PostgreSQL", "MySQL", "Elasticsearch", "Redis", "InfluxDB", 
    "Oracle Database", "Microsoft SQL Server", "Snowflake", "BigQuery", "Apache Superset", "Tableau", 
    "Power BI", "QlikSense", "Grafana", "Prometheus", "Looker", "Databricks", "DataRobot", "Airflow", 
    "dbt (Data Build Tool)", "Talend", "Fivetran", "Stitch Data", "Informatica", "Apache NiFi", "Pentaho", 
    "Hadoop Ecosystem", "Dataiku", "KNIME", "Alteryx", "RapidMiner", "H2O.ai", "IBM Watson", 
    "Microsoft Cognitive Services", "OpenAI", "DeepMind", "Reinforcement Learning", "Supervised Learning", 
    "Unsupervised Learning", "Natural Language Processing", "Computer Vision", "Speech Recognition", 
    "Chatbots", "Deep Learning", "Machine Learning Operations (MLOps)", "AutoML", "AI Ethics", 
    "Blockchain", "Ethereum", "Bitcoin", "Smart Contracts", "Web3"
]

In [3]:
# Récupérer les identifiants des vidéos
# ! les resultats sont MAXRESULT * 16 (10 = 160 results)
video_ids = get_technology_video_ids(tags, max_results=10)
print(video_ids)

Erreur lors de la récupération des vidéos : <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&type=video&videoCategoryId=28&q=Cloud+Computing+Kubernetes+Docker+DevOps+CI%2FCD+GitHub+Actions+Jenkins+Terraform+Ansible+CloudFormation&key=AIzaSyAoz5fAtLoF3jZbOSFljrIb2IFnC8MnK0Y&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
Erreur lors de la récupération des vidéos : <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&type=video&videoCategoryId=28&q=AWS+Microsoft+Azure+Google+Cloud+Platform+Virtual+Machines+Microservices+Serverless+Edge+Computing+SaaS+PaaS+IaaS&key=AIzaSyAoz5fAtLoF3jZbOSFljrI

Erreur lors de la récupération des vidéos : <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&type=video&videoCategoryId=28&q=Chartio+Mode+Analytics+Sigma+Computing+Sisense+DataRobot+Alteryx+RapidMiner+SPSS+SAS+MicroStrategy&key=AIzaSyAoz5fAtLoF3jZbOSFljrIb2IFnC8MnK0Y&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
Erreur lors de la récupération des vidéos : <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&type=video&videoCategoryId=28&q=TIBCO+Spotfire+KNIME+SQLAlchemy+Apache+Superset+Grafana+Prometheus+Elastic+Stack+Splunk+Machine+Learning+Operations+%28MLOps%29+Real-time+Analytics&key

['1PAy6d16ADQ', 'Ca57qaaqExQ', 'eAcxe3MzcVk', 'KOjdOpkSxwY', 'D7DB7JBlCiI', 'x5PrfleL9H4', 'MxrUR5wXk_A', 'PQV5qtS1E1U', 'T_8ZwnhBEBE', 'icKRNXS83V4', 'ORMx45xqWkA', 'nOrqA03MLDg', 'R3jebOskbO4', 'l1WERyHFc6s', 'bpCaGeHF6sA', 'XgWTfaFiR7o', 'YU_vMvtMjzo', 'nh0z3mEXteQ', 'G5RY_SUJih4', 'IMiaUvteYNM']


In [4]:
# Écrire les informations des vidéos dans un fichier CSV
with open('video_info.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['video_id', 'title', 'publishedAt','channelId', 'description', 'view_count', 'like_count', 'favorite_count', 'comment_count', 'tags', 'defaultLanguage'])

    for video_id in video_ids:
        video_info = get_video_info(video_id)
        if video_info:
            writer.writerow([
                video_id,
                video_info['snippet']['title'],
                video_info['snippet']['publishedAt'],
                video_info['snippet']['channelId'],
                video_info['snippet']['description'],
                video_info['statistics'].get('viewCount', 'N/A'),
                video_info['statistics'].get('likeCount', 'N/A'),
                video_info['statistics'].get('favoriteCount', 'N/A'),
                video_info['statistics'].get('commentCount', 'N/A'),
                video_info['snippet'].get('tags', 'N/A'),
                video_info['snippet'].get('defaultLanguage', 'N/A')
            ])

Erreur pour l'ID vidéo eAcxe3MzcVk: 403
Erreur pour l'ID vidéo x5PrfleL9H4: 403
Erreur pour l'ID vidéo PQV5qtS1E1U: 403
Erreur pour l'ID vidéo T_8ZwnhBEBE: 403
Erreur pour l'ID vidéo nOrqA03MLDg: 403
Erreur pour l'ID vidéo l1WERyHFc6s: 403
Erreur pour l'ID vidéo bpCaGeHF6sA: 403
Erreur pour l'ID vidéo XgWTfaFiR7o: 403
Erreur pour l'ID vidéo YU_vMvtMjzo: 403
Erreur pour l'ID vidéo IMiaUvteYNM: 403


In [5]:
# tag_groups = [tags[i:i + 10] for i in range(0, len(tags), 10)]
# print(tag_groups)