In [None]:
# 必要モジュールのインポート
import os
from dotenv import load_dotenv

# .envファイルの内容を読み込見込む
load_dotenv()

# スター数上位100のjavascriptリポジトリのURLを取得

In [1]:
import requests
import json

def fetch_top_github_repos():
    url = "https://api.github.com/search/repositories"
    headers = {
        "Accept": "application/vnd.github+json",
        "User-Agent": "Top-Repo-Fetcher"
    }
    params = {
        "q": "stars:>1 language:JavaScript",  # JavaScriptでスター数が1以上のリポジトリを検索
        "sort": "stars",   # スター数でソート
        "order": "desc",  # 降順
        "per_page": 100     # 1ページの最大件数
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json()
        
        # リポジトリのURLを抽出
        top_repos = [repo["html_url"] for repo in data.get("items", [])]
        
        # JSON形式で保存
        with open("top100_javascript_repositories.json", "w", encoding="utf-8") as f:
            json.dump(top_repos, f, ensure_ascii=False, indent=4)

        print("トップ100のJavaScriptリポジトリのURLが 'top100_javascript_repositories.json' に保存されました。")

    except requests.exceptions.RequestException as e:
        print(f"APIリクエストに失敗しました: {e}")

if __name__ == "__main__":
    fetch_top_github_repos()


トップ100のJavaScriptリポジトリのURLが 'top100_javascript_repositories.json' に保存されました。


# libraries.ioからキーワードを取得したい

In [1]:
import json
import requests
import time

# APIトークンを設定（ここに取得したトークンを記入）
API_TOKEN = os.environ["LIBRALIESIO_API_KEY"]

# 入力ファイルと出力ファイルのパス
INPUT_FILE = "top100_javascript_repositories.json"
OUTPUT_FILE = "keywords.json"

# ベースURL
BASE_URL = "https://libraries.io/api"

def get_library_keywords(github_url):
    """
    指定されたGitHubリポジトリURLに一致するライブラリのキーワードを取得します。
    """
    try:
        # APIリクエストURLを作成
        url = f"{BASE_URL}/search"
        params = {
            "q": github_url,
            "api_key": API_TOKEN
        }
        
        # APIにリクエストを送信
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        # APIのレスポンスを解析
        results = response.json()
        if results:
            # 最初の結果のキーワードを返す
            return results[0].get("keywords", [])
        else:
            return []
    except Exception as e:
        print(f"Error fetching keywords for {github_url}: {e}")
        return []

def main():
    # 入力ファイルを読み込み
    with open(INPUT_FILE, "r") as infile:
        github_urls = json.load(infile)
    
    # 結果を格納する辞書
    keywords_dict = {}
    
    for github_url in github_urls:
        print(f"Fetching keywords for {github_url}...")
        keywords = get_library_keywords(github_url)
        keywords_dict[github_url] = keywords
        time.sleep(1)
    
    # 結果を出力ファイルに保存
    with open(OUTPUT_FILE, "w") as outfile:
        json.dump(keywords_dict, outfile, indent=4)
    
    print(f"Keywords saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()


Fetching keywords for https://github.com/facebook/react...
Fetching keywords for https://github.com/trekhleb/javascript-algorithms...
Fetching keywords for https://github.com/twbs/bootstrap...
Fetching keywords for https://github.com/airbnb/javascript...
Fetching keywords for https://github.com/vercel/next.js...
Fetching keywords for https://github.com/Chalarangelo/30-seconds-of-code...
Fetching keywords for https://github.com/nodejs/node...
Fetching keywords for https://github.com/axios/axios...
Fetching keywords for https://github.com/mrdoob/three.js...
Fetching keywords for https://github.com/facebook/create-react-app...
Fetching keywords for https://github.com/ryanmcdermott/clean-code-javascript...
Fetching keywords for https://github.com/iptv-org/iptv...
Fetching keywords for https://github.com/microsoft/Web-Dev-For-Beginners...
Fetching keywords for https://github.com/sveltejs/svelte...
Fetching keywords for https://github.com/jaywcjlove/awesome-mac...
Fetching keywords for https

# npmのキーワード（keywords=serverで検索したときの）を取りたい

In [2]:
import requests
from bs4 import BeautifulSoup
import time

def get_list_items(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        ul_elements = soup.find_all('ul', class_='cf33f2b9 w-90 list mh0 mv2 pa0 truncate')

        all_items = []
        for ul in ul_elements[:10]:  # 最初の10個の<ul>に対して処理
            li_elements = ul.find_all('li')  # <li>要素をすべて取得
            item_texts = [li.get_text(strip=True) for li in li_elements]  # 文字を取得
            all_items.append(item_texts)
            time.sleep(1)
        
        return all_items
    else:
        print("ページの取得に失敗しました。")
        return []

# 使用例
url = 'https://www.npmjs.com/search?page=0&q=keywords%3Aserver&sortBy=dependent_count
'
list_items = get_list_items(url)

# 結果の表示
for index, items in enumerate(list_items):
    print(f"リスト {index + 1}:")
    for item in items:
        print(f" - {item}")


リスト 1:
 - react
 - framework
 - nextjs
 - web
 - server
 - node
 - front-end
 - backend
 - cli
 - vercel
リスト 2:
 - util
 - functional
 - server
 - client
 - browser
リスト 3:
 - mysql
 - client
 - server
リスト 4:
 - task
 - async
 - cli
 - minify
 - uglify
 - build
 - lodash
 - unit
 - test
 - qunit
 - nodeunit
 - server
 - init
 - scaffold
 - View more
リスト 5:
 - static
 - file
 - server
リスト 6:
 - websocket
 - websockets
 - socket
 - networking
 - comet
 - push
 - RFC-6455
 - realtime
 - server
 - client
リスト 7:
 - database
 - mssql
 - sql
 - server
 - msnodesql
 - sqlserver
 - tds
 - node-tds
 - tedious
 - node-sqlserver
 - msnodesqlv8
 - azure
 - node-mssql
リスト 8:
 - model
 - view
 - controller
 - router
 - server
 - client
 - browser
リスト 9:
 - cli
 - command
 - static
 - http
 - https
 - http-server
 - https-server
 - server
リスト 10:
 - validation
 - validate
 - server
 - client


In [None]:
API_TOKEN = os.environ["LIBRALIESIO_API_KEY"]

In [2]:
import requests
import json
import time

# ベースURLの設定
BASE_URL = 'https://libraries.io/api/platforms/npm/projects'

# 出力ファイル名の設定
OUTPUT_FILE = 'top_js_rep_key.json'

# 1回のリクエストで取得するプロジェクト数
PER_PAGE = 50

# APIリクエスト間の待機時間（秒）
REQUEST_DELAY = 1

def fetch_projects(page):
    """指定したページのプロジェクト一覧を取得する"""
    params = {
        'api_key': API_KEY,
        'sort': 'stars',
        'per_page': PER_PAGE,
        'page': page
    }
    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f'APIリクエスト失敗: {response.status_code}, {response.text}')

def main():
    all_projects = []
    page = 1

    while len(all_projects) < 100:
        print(f'ページ {page} のデータを取得中...')
        projects = fetch_projects(page)
        if not projects:
            break
        all_projects.extend(projects)
        page += 1
        time.sleep(REQUEST_DELAY)

    # 上位100件に絞る
    top_projects = all_projects[:100]

    # 必要な情報を抽出
    result = []
    for project in top_projects:
        result.append({
            'name': project['name'],
            'repository_url': project['repository_url'],
            'keywords': project.get('keywords', [])
        })

    # JSONファイルに保存
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(result, f, ensure_ascii=False, indent=4)

    print(f'データを {OUTPUT_FILE} に保存しました。')

if __name__ == '__main__':
    main()



ページ 1 のデータを取得中...


Exception: APIリクエスト失敗: 404, <!DOCTYPE html>
<html lang="en">
<head>
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="msvalidate.01" content="3667D2FEB238C1BF174F3CB1AE0D3C16" />
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <link rel="stylesheet" href="/assets/application-54c51e436e998966c20a8e883979835b548519abf02da82fdbe6c604f2205a16.css" media="all" data-turbolinks-track="true" />
  <title>Libraries.io - security & maintenance data for open source software</title>
  <meta name="description" content="Discover open source packages, modules and frameworks you can use in your code.">
  <meta name="csrf-param" content="authenticity_token" />
<meta name="csrf-token" content="PSvue1H7o7yF3es_n5rH7I6sXcUdKsJE966nIgAdBH1WUgv_VCk0wF_9-uhzGil-dqUw1zpqFgdOfn0GeCTnVg" />
  
  <link rel="apple-touch-icon-precomposed" sizes="57x57" href="https://libraries.io/assets/apple-touch-icon-57x57-d2ce1ae09b96b2dcd4548cdab571b2f0f04567fd52bb42acdc158f3e499fb9f2.png" />
  <link rel="apple-touch-icon-precomposed" sizes="114x114" href="https://libraries.io/assets/apple-touch-icon-114x114-0b33f62bf1188d522e2a3af36c193aaeabef9f6dbd01fd3321a3aac20270c3f7.png" />
  <link rel="apple-touch-icon-precomposed" sizes="72x72" href="https://libraries.io/assets/apple-touch-icon-72x72-0d786ee6083b71a6af882b9ae456f6ef055ac33bfaf3c8ae55eac5d38491d742.png" />
  <link rel="apple-touch-icon-precomposed" sizes="144x144" href="https://libraries.io/assets/apple-touch-icon-144x144-e44e66335b196ade90f421758d5d66c4ba3903d891e28431288bae4a76543f91.png" />
  <link rel="apple-touch-icon-precomposed" sizes="60x60" href="https://libraries.io/assets/apple-touch-icon-60x60-bdf27a1057418054b28c679717d461ed052fdfa7cc1930e65200562e9c535d77.png" />
  <link rel="apple-touch-icon-precomposed" sizes="120x120" href="https://libraries.io/assets/apple-touch-icon-120x120-fd2ca061f478b697773e9d1d0144fe1ddf5c5fee5be46c07541271da4bee10fe.png" />
  <link rel="apple-touch-icon-precomposed" sizes="76x76" href="https://libraries.io/assets/apple-touch-icon-76x76-788ab56a7ab652c168cad5fef06ec3fec102768f08cd691a4b51e66e474e8538.png" />
  <link rel="apple-touch-icon-precomposed" sizes="152x152" href="https://libraries.io/assets/apple-touch-icon-152x152-be5ef5b4a68d187f60df08e2415e0ec8553225e5c3be3a7f044279b8a44f8d33.png" />
  <meta name="application-name" content="&nbsp;"/>
  <meta name="msapplication-TileColor" content="#FFFFFF" />
  <meta name="msapplication-TileImage" content="https://libraries.io/assets/mstile-144x144-e44e66335b196ade90f421758d5d66c4ba3903d891e28431288bae4a76543f91.png" />
  <meta name="msapplication-square70x70logo" content="https://libraries.io/assets/mstile-70x70-9d4251daf8e8befa87287d559f146b72bf061e2a6778f1c337b6f59d6ecdc1b6.png" />
  <meta name="msapplication-square150x150logo" content="https://libraries.io/assets/mstile-150x150-2cdc9f4fbbe7573c60d1175e2d56e034e47cb34c53c3dbbae4986e72747f82fb.png" />
  <meta name="msapplication-wide310x150logo" content="https://libraries.io/assets/mstile-310x150-52c90e484d1ec5b2d51fb246d837d844b85486a5ac718393e9c09b6555ab373c.png" />
  <meta name="msapplication-square310x310logo" content="https://libraries.io/assets/mstile-310x310-3547800cb3aa250b9dcfa0d95ffa675c73580ece85c09edaca209657ad281a34.png" />
  <meta property="fb:admins" content="508462908" />
  <meta name="yandex-verification" content="2a6b144b8bd37026" />
  <meta property="og:type" content="article" />
<meta property="og:site_name" content="Libraries.io" />
<meta property="og:url" content="https://libraries.io" />
<meta property="og:title" content="Libraries.io - security &amp; maintenance data for open source software" />
<meta property="og:description" content="Discover open source libraries, modules and frameworks you can use in your code" />
<meta property="og:image"  content="https://libraries.io/apple-touch-icon-152x152.png" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@librariesio" />
<meta name="twitter:title" content="Libraries.io - security &amp; maintenance data for open source software" />
<meta name="twitter:description" content="Discover open source libraries, modules and frameworks you can use in your code" />
<meta name="twitter:image" content="https://libraries.io/apple-touch-icon-152x152.png" />

  <link href="/opensearch.xml" title="Libraries.io" rel="search" type="application/opensearchdescription+xml">
    <script type="text/javascript">
      const amplitudeEnabledForRequest = false
      const amplitudeApiKey = "2605ad38181f3d7322f5e3c71ceddd85"
        const currentUserId = null
        const currentUserEmail = null
    </script>
    <script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
        ga('create', 'UA-105624087-2', 'auto');
    </script>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','GTM-P7XSSNL');</script>
  <script src="/assets/application-1d8dea1ec31c995c6479cf68737480a4cc63385054dd33a256ac03ba0dd61e01.js" data-turbolinks-track="true"></script>
  <script type="application/ld+json">
    {
      "@context": "http://schema.org",
      "@type": "WebSite",
      "name": "Libraries.io",
      "url": "https://libraries.io/",
      "description": "Discover open source packages, modules and frameworks you can use in your code",
      "license": "https://github.com/librariesio/libraries.io/blob/main/LICENSE.txt",
      "potentialAction": {
        "@type": "SearchAction",
        "target": "https://libraries.io/search?q={search_term_string}",
        "query-input": "required name=search_term_string"
      }
    }
  </script>
  <script type="application/ld+json">
    {
      "@context": "http://schema.org",
      "@type": "Organization",
      "name": "Libraries.io",
      "description": "Discover open source packages, modules and frameworks you can use in your code",
      "url": "https://libraries.io/",
      "logo": "https://libraries.io/assets/apple-touch-icon-152x152-be5ef5b4a68d187f60df08e2415e0ec8553225e5c3be3a7f044279b8a44f8d33.png",
      "email": "support@libraries.io",
      "sameAs": [
        "https://twitter.com/librariesio",
        "https://www.facebook.com/libraries.io",
        "https://github.com/librariesio",
        "https://plus.google.com/101868076054784591044"
      ]
    }
  </script>
</head>
<body>
  



  <div
    class="flash-banner text-center hidden-xs"
    role="alert"
  >
    <strong>
  <a href="https://explore.tidelift.com/2024-tidelift-impact-report?utm_source=read-the-docs&utm_medium=referral&utm_campaign=2024-impact-report"
    style="text-decoration:underline;color: white;">Read now!</a>
    How one org saved $1.1M and reduced OSS risk 💸
</strong>

  </div>

  <div class="navbar navbar-inverse navbar-static-top">
    <div class="container">
      <div class="navbar-header">
        <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar-main">
          <span class="sr-only">Toggle navigation</span>
          <span class="icon-bar"></span>
          <span class="icon-bar"></span>
          <span class="icon-bar"></span>
        </button>
        <a class="navbar-brand" href="/"><img width="180" height="45" src="/assets/logo-f12a3d741e6af01c2ff47be93f3429827ff8986ba389bdf4e703b75060f68a2b.svg" /></a>
      </div>
      <div class="navbar-collapse collapse" id="navbar-main">
          <ul class="nav navbar-nav navbar-left">
            <li class=' '>
              <form class="navbar-form search_form" action="/search" accept-charset="UTF-8" method="get">
  <div class="input-group input-group">
    <input type="text" name="q" id="q" value="" placeholder="Search Packages" class="form-control search-input" size="40" />
      
      
      
      
      <input type="hidden" name="sort" id="sort" value="stars" autocomplete="off" />

    <span class="input-group-btn">
      <button class="btn btn-primary" type="submit">
        <i class="fa fa-search"></i>
      </button>
    </span>

  </div>
</form>
            </li>
          </ul>
        <ul class="nav navbar-nav navbar-right">
            <li>
              <div class="navbar-btn btn-group">
                <button type="button" class="btn btn-primary dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                  Login <span class="caret"></span>
                </button>
                <ul class="dropdown-menu">
                  <li>
                    <a href="/login?host_type=github&amp;return_to=https%3A%2F%2Flibraries.io%2Fapi%2Fplatforms%2Fnpm%2Fprojects%3Fapi_key%3D9711c7ece159960b593787c129d93cbb%26sort%3Dstars%26per_page%3D50%26page%3D1">
                      <i class="fa fa-github"></i> GitHub
</a>                  </li>
                  <li>
                    <a href="/login?host_type=gitlab&amp;return_to=https%3A%2F%2Flibraries.io%2Fapi%2Fplatforms%2Fnpm%2Fprojects%3Fapi_key%3D9711c7ece159960b593787c129d93cbb%26sort%3Dstars%26per_page%3D50%26page%3D1">
                      <i class="fa fa-gitlab"></i> GitLab
</a>                  </li>
                  <li>
                    <a href="/login?host_type=bitbucket&amp;return_to=https%3A%2F%2Flibraries.io%2Fapi%2Fplatforms%2Fnpm%2Fprojects%3Fapi_key%3D9711c7ece159960b593787c129d93cbb%26sort%3Dstars%26per_page%3D50%26page%3D1">
                      <i class="fa fa-bitbucket"></i> Bitbucket
</a>                  </li>
                  <li class='divider'></li>
                  <li class="dropdown-header terms">
                    By logging in you accept <br>our <a href='/terms'>terms of service</a> <br>and <a href='/privacy'>privacy policy</a>
                  </li>
                </ul>
              </div>
            </li>
        </ul>
      </div>
    </div>
  </div>

  <div class="container">
    <div class="row">
  <div class="col-sm-8">
    <h2>We can't find whatever it was you were looking for.</h2>
    <p>In truth, we may never have had what you are looking for, it could have been removed or never existed.</p>
    <p>
      Email us at <a href="mailto:support@libraries.io">support@libraries.io</a> if this is ruining your day.
    </p>
  </div>
  <div class="col-sm-4">

  </div>
</div>

  </div>
  <footer data-ga-tracked-el='footer'>
  <div class="container">
    <div class='row'>
      <div class="col-xs-12 col-sm-6 col-md-4">
          <p>
            <a href="/">Libraries.io</a> helps you find new open source packages, modules and frameworks and keep track of ones you depend upon.
          </p>
          <p>
            <a href="https://github.com/librariesio" title='GitHub'><i class="fa fa-github"></i></a>&nbsp;
            <a href="mailto:support@libraries.io" title='Email'><i class="fa fa-envelope"></i></a>&nbsp;
            <a href="https://github.com/librariesio/libraries.io/issues/new" title='Support'><i class="fa fa-question-circle"></i></a>&nbsp;
          </p>
          <hr>
          <p>
            <a href="https://tidelift.com/?utm_source=librariesio&amp;utm_medium=referral&amp;utm_campaign=footer"><img width="160" src="/assets/Tidelift_project_logo_white-eb4d67a698f88ef27b665cf10d43d989c960f6c0c40aa929a440f02864d177b8.svg" /></a>
          </p>
          <p class='text-muted'>
            Copyright © 2024 Tidelift, Inc<br>
            Code is Open Source under <a href="https://github.com/librariesio/libraries.io/blob/main/LICENSE.txt">AGPLv3</a> license<br>
            Data is available under <a href="https://libraries.io/data">CC-BY-SA 4.0</a> license
          </p>
      </div>
      <div class="col-xs-12 col-md-2">
        <strong><a href="/explore">Explore</a></strong>
        <ul>
          <li><a href="/platforms">Platforms</a></li>
          <li><a href="/languages">Languages</a></li>
          <li><a href="/licenses">Licenses</a></li>
        </ul>
      </div>
      <div class="col-xs-12 col-md-2">
        <ul class='list-unstyled'>
          <li><strong><a href="/about">About</a></strong></li>
          <li><strong><a href="/team">Team</a></strong></li>
          <li><strong><a href="/terms">Terms and Conditions</a></strong></li>
          <li><strong><a href="/privacy">Privacy Policy</a></strong></li>
          <li><strong><a href="/api">API</a></strong></li>
        </ul>
      </div>
    </div>
  </div>
</footer>


  <script>
    if (typeof ga === 'function') { ga('send', 'pageview', location.pathname+location.search) }
  </script>
</body>
</html>


# リンク整形

In [7]:
import json
import os

def git_url(json_file):
    url_list = []
    # JSONファイルを読み込む
    with open(json_file, 'r', encoding='utf-8') as file:
        data = json.load(file)

    for item in data:
        # 各行からURLを取得
        url_list.append(item)

    return url_list

def create_npm_url(json_file):
    url_list = []
    # JSONファイルを読み込む
    with open(json_file, 'r', encoding='utf-8') as file:
        data = json.load(file)

    for item in data:
        if(item != ''):
            # 各行からnpmのURLを作成
            url = "https://www.npmjs.com/package/" + item
            url_list.append(url)
        else:
            url_list.append(item)

    return url_list

# ファイルパスの設定
file_path = 'jsProject_path.json'

# 結果を格納するリスト
result = []

git_list = []
npm_list = []

 # JSONファイルのパスを指定
github_file = './top100_javascript_repositories.json' 
npm_file = './jsTopRep_npmURL.json'

git_list = git_url(github_file)
npm_list = create_npm_url(npm_file)


for (g, n) in zip(git_list, npm_list): 
    # JSONデータを作成
    result.append({
        "github": g,
        "npm": n,
        "libraries.io": ""
    })

# JSONファイルに書き込む
with open(file_path, 'w', encoding='utf-8') as json_file:
    json.dump(result, json_file, ensure_ascii=False, indent=4)
    
print("finish!")

finish!


# キーワード取得（github npm libraries.io）

In [6]:
import requests
import time
from bs4 import BeautifulSoup
import json
import os

# githubのトピックを取得する
def get_repository_topics(repository, token):
    rep_split = repository.split('/')
    owner = rep_split[-2]
    repo = rep_split[-1]
    
    url = f"https://api.github.com/repos/{owner}/{repo}/topics"
    # ヘッダーを設定
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github.mercy-preview+json",
        "X-GitHub-Api-Version": "2022-11-28"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        topics = response.json().get("names", [])
    elif response.status_code == 404:
        print("リポジトリが見つかりません。")
    else:
        print(f"エラーが発生しました: {response.status_code} - {response.text}")
    
    return(topics)


# npmのキーワードを取得する
def npm_keywords(url):
    # キーワードを格納するリスト
    keywords = []
    
    if(url != ''):
        # HTMLコンテンツを取得
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # <li class="dib mr2">を検索
        for li in soup.select('li.dib.mr2'):
            # <a>タグのテキストを取得
            a_tag = li.find('a')
            if a_tag and a_tag.text:
                keywords.append(a_tag.text.strip())
    else:
        pass
    
    return keywords


# libraries.ioのキーワードを取得する
def libraries_keywords(url):
    # キーワードを格納するリスト
    keywords = []
    
    if(url != ''):
        # HTMLコンテンツを取得
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # <dt>Keywords</dt>の次の<dd>を取得
        dt_keywords = soup.find('dt', string=lambda text: text and 'Keywords' in text.strip())
        if dt_keywords:
            dd_keywords = dt_keywords.find_next_sibling('dd')
            if dd_keywords:
                # <a>タグのテキストを取得
                for a_tag in dd_keywords.find_all('a'):
                    keywords.append(a_tag.text.strip())
    else:
        pass

    return keywords
    
i=0

# ファイルパスの設定
file_path = 'keywords_V2.json'
# トークン
token = os.environ['GITHUB_API_KEY']

# 結果を格納するリスト
result = []

url_list = []
json_file = 'jsProject_path.json'  # JSONファイルのパスを指定

# JSONファイルを読み込む
with open(json_file, 'r', encoding='utf-8') as file:
    data = json.load(file)

for item in data:
    # プロジェクト名作成
    rep_split = item['github'].split('/')
    clone_dir = rep_split[-2] + "_" + rep_split[-1]
    
    github = []
    npm = []
    libraries = []
    
    github = get_repository_topics(item['github'], token)
    npm = npm_keywords(item['npm'])
    libraries_keywords(item['libraries.io'])
    
    # 要素を重複させずにリストを結合する
    keyword_list = list(set(github + npm + libraries))
    keyword_list.sort()
    
    # JSONデータを作成
    result.append({
        "Name": clone_dir,
        "keywords": keyword_list
    })
    
    i += 1
    print(str(i) + " " + clone_dir)
    time.sleep(3)


# JSONファイルに書き込む
with open(file_path, 'w', encoding='utf-8') as json_file:
    json.dump(result, json_file, ensure_ascii=False, indent=4)

1 facebook_react
2 trekhleb_javascript-algorithms
3 twbs_bootstrap
4 airbnb_javascript
5 vercel_next.js
6 Chalarangelo_30-seconds-of-code
7 nodejs_node
8 axios_axios
9 mrdoob_three.js
10 facebook_create-react-app
11 ryanmcdermott_clean-code-javascript
12 iptv-org_iptv
13 microsoft_Web-Dev-For-Beginners
14 sveltejs_svelte
15 jaywcjlove_awesome-mac
16 FortAwesome_Font-Awesome
17 typicode_json-server
18 anuraghazra_github-readme-stats
19 hakimel_reveal.js
20 expressjs_express
21 chartjs_Chart.js
22 webpack_webpack
23 leonardomso_33-js-concepts
24 resume_resume.github.com
25 louislam_uptime-kuma
26 atom_atom
27 lodash_lodash
28 adam-p_markdown-here
29 jquery_jquery
30 angular_angular.js
31 h5bp_html5-boilerplate
32 gatsbyjs_gatsby
33 scutan90_DeepLearning-500-questions
34 azl397985856_leetcode
35 jgraph_drawio-desktop
36 Semantic-Org_Semantic-UI
37 juliangarnier_anime
38 prettier_prettier
39 mozilla_pdf.js
40 chinese-poetry_chinese-poetry
41 moment_moment
42 gorhill_uBlock
43 marktext_mark

# 各リポジトリの説明を取得

In [3]:
import requests
import time
from bs4 import BeautifulSoup
import json
import os

# GitHubのAbout情報を取得する関数
def github_abouts(url):
    if url:
        try:
            # HTMLコンテンツを取得
            response = requests.get(url, timeout=10)
            response.raise_for_status()  # ステータスコードがエラーの場合、例外を発生させる
            soup = BeautifulSoup(response.text, 'html.parser')

            # <div class="hide-sm hide-md">を検索
            for div in soup.select('div.hide-sm.hide-md'):
                # <p>タグのテキストを取得
                p_tag = div.find('p')
                if p_tag and p_tag.text:
                    description = p_tag.text.strip()
        except requests.RequestException as e:
            print(f"Error fetching URL {url}: {e}")
    return description

# ファイルパスの設定
file_path = 'repository_V4.json'
json_file = 'jsProject_path.json'  # プロジェクトリストのJSONファイル


# 結果を格納するリスト
result = []
i=0

# JSONファイルを読み込む
with open(json_file, 'r', encoding='utf-8') as file:
    data = json.load(file)

# keywords_V2.jsonの読み込み
with open('keywords_V2.json', 'r', encoding='utf-8') as file:
    keywords_data = json.load(file)

    
for item in data:
    # プロジェクト名作成
    rep_split = item['github'].split('/')
    clone_dir = rep_split[-2] + "_" + rep_split[-1]
    
    # About情報を取得
    about = github_abouts(item['github'])
    
    for key in keywords_data:
        if key['Name'] == clone_dir:
                result.append({
                    'Name':clone_dir,
                    'keywords':key['keywords'],
                    'About':about,
                    'efficiency':0
                })
    
                i += 1
                print(str(i) + " " + clone_dir)
                print(key['keywords'])
                print(about)
                print()
    
    # APIリクエスト制限を避けるために待機
    time.sleep(2)

# JSONファイルに書き込む
with open(file_path, 'w', encoding='utf-8') as json_file:
    json.dump(result, json_file, ensure_ascii=False, indent=4)

1 facebook_react
['declarative', 'frontend', 'javascript', 'library', 'react', 'ui']
The library for web and native user interfaces.

2 trekhleb_javascript-algorithms
['algorithm', 'algorithms', 'computer-science', 'cs', 'data-structures', 'graph', 'interview', 'interview-preparation', 'javascript', 'javascript-algorithms', 'sorting-algorithms', 'tree']
📝 Algorithms and data structures implemented in JavaScript with explanations and links to further readings

3 twbs_bootstrap
['bootstrap', 'css', 'css-framework', 'framework', 'front-end', 'html', 'javascript', 'mobile-first', 'responsive', 'sass', 'scss', 'web']
The most popular HTML, CSS, and JavaScript framework for developing responsive, mobile first projects on the web.

4 airbnb_javascript
['airbnb', 'arrow-functions', 'es2015', 'es2016', 'es2017', 'es2018', 'es6', 'eslint', 'javascript', 'jsx', 'lint', 'linting', 'naming-conventions', 'react', 'style guide', 'style-guide', 'style-linter', 'styleguide', 'tc39']
JavaScript Style Gu

42 gorhill_uBlock
['adblock', 'blocker', 'browser-extension', 'chromium', 'firefox', 'javascript', 'trie', 'uBO', 'uBlock', 'ublock', 'ublock-origin']
uBlock Origin - An efficient blocker for Chromium and Firefox. Fast and lean.

43 marktext_marktext
['dark-mode', 'editor', 'electron', 'element-ui', 'emoji', 'focus-mode', 'latex', 'linux', 'mac', 'macos', 'markdown', 'marktext', 'next-generation', 'source-code', 'typewriter-mode', 'vue', 'windows']
📝A simple and elegant markdown editor, available for Linux, macOS and Windows.

44 TryGhost_Ghost
['blogging', 'cms', 'creator-economy', 'ghost', 'hacktoberfest', 'headless-cms', 'jamstack', 'javascript', 'journalism', 'nodejs', 'publishing', 'web-application']
Independent technology for modern publishing, memberships, subscriptions and newsletters.

45 NARKOZ_hacker-scripts
[]
Based on a true story

46 cypress-io_cypress
['angular-testing-library', 'automation', 'browser', 'component', 'component-testing', 'cypress', 'cypress-cloud', 'cypre

73 NaiboWang_EasySpider
['batch-processing', 'batch-script', 'code-free', 'crawler', 'data-collection', 'frontend', 'gui', 'html', 'input-parameters', 'layman', 'parameters', 'robotics', 'rpa', 'scraper', 'spider', 'visual', 'visualization', 'visualprogramming', 'web', 'www']
A visual no-code/code-free web crawler/spider易采集：一个可视化浏览器自动化测试/数据采集/爬虫软件，可以无代码图形化的设计和执行爬虫任务。别名：ServiceWrapper面向Web应用的智能化服务封装系统。

74 naptha_tesseract.js
['deep-learning', 'javascript', 'ocr', 'tesseract', 'webassembly']
Pure Javascript OCR for more than 100 Languages 📖🎉🖥

75 denysdovhan_wtfjs
['book', 'guide', 'handbook', 'javascript', 'js', 'learning', 'notes', 'specification', 'wtf']
🤪 A list of funny and tricky JavaScript examples

76 alvarotrigo_fullPage.js
['fullpage', 'fullpagejs', 'fullscreen', 'javacript', 'javascript', 'jquery', 'mouse', 'mousewheel', 'one_page', 'onepage', 'scroll', 'scrolling', 'sections', 'single_page', 'slide', 'slides', 'slideshow', 'sliding', 'snap', 'swipe', 'wheel']
fullPage plugin

# 重複するキーワードだけ集めてみる

In [15]:
import json
from collections import Counter

# JSONファイルの読み込み
with open('keywords.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# 全てのkeywordsを収集
all_keywords = []
for item in data:
    all_keywords.extend(item.get('keywords', []))

# 重複するkeywordsをカウント
keyword_counts = Counter(all_keywords)

# 2つ以上重複するkeywordsをフィルタリング
duplicate_keywords = {keyword for keyword, count in keyword_counts.items() if count > 1}

# 重複するkeywordsを持つ元のデータをフィルタリング（repositoryも含む）
filtered_data = []
for item in data:
    repository = item.get('Name')  # repositoryの取得
    filtered_keywords = [keyword for keyword in item.get('keywords', []) if keyword in duplicate_keywords]

    # keywordsが重複していない場合や空値の場合
    if not filtered_keywords:
        filtered_data.append({
            'repository': repository,
            'keywords': []  # 空値を保存
        })
    else:
        filtered_data.append({
            'repository': repository,
            'keywords': filtered_keywords
        })

# 結果を新しいJSONファイルに書き込む
with open('keywords_dup.json', 'w', encoding='utf-8') as file:
    json.dump(filtered_data, file, ensure_ascii=False, indent=4)


In [17]:
import json

json_file = 'keywords_dup.json'

# JSONファイルを読み込む
with open(json_file, 'r', encoding='utf-8') as file:
    data = json.load(file)

i = 1
for item in data:
    print(i)
    print(item)
    print()
    i += 1

1
{'repository': 'facebook_react', 'keywords': ['javascript', 'library', 'react', 'ui']}

2
{'repository': 'trekhleb_javascript-algorithms', 'keywords': ['algorithm', 'algorithms', 'computer-science', 'data-structures', 'graph', 'interview', 'javascript', 'tree']}

3
{'repository': 'twbs_bootstrap', 'keywords': ['css', 'css-framework', 'framework', 'front-end', 'html', 'javascript', 'responsive', 'scss', 'web']}

4
{'repository': 'airbnb_javascript', 'keywords': ['es2015', 'es6', 'javascript', 'jsx', 'react']}

5
{'repository': 'vercel_next.js', 'keywords': ['blog', 'browser', 'cli', 'compiler', 'components', 'framework', 'front-end', 'node', 'react', 'server', 'ssg', 'static-site-generator', 'web']}

6
{'repository': 'Chalarangelo_30-seconds-of-code', 'keywords': ['awesome-list', 'css', 'education', 'es6-javascript', 'git', 'html', 'javascript', 'nodejs', 'programming']}

7
{'repository': 'nodejs_node', 'keywords': ['javascript', 'js', 'linux', 'macos', 'node', 'nodejs', 'runtime', 'w

In [19]:
len(duplicate_keywords)

91

# jsonからxlsxへ

In [3]:
import json
import os
import pandas as pd


# プロジェクトディレクトリの一覧
RESULT_DIR = "./zenkoku_result"
result = os.listdir(RESULT_DIR)

# 移動
os.chdir('zenkoku_result')

for dir in result:
    # JSONファイルの読み込み
    with open(dir, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # データフレームの作成
    df = pd.DataFrame(data)

    # 必要な列を指定して並べ替え
    df = df[['repository', 'fast', 'slow']]

    # Excelファイルの作成
    fname = dir.split('.')[0]
    df.to_excel(f'{fname}.xlsx', index=False, engine='openpyxl')
    print(f'create {fname}.xlsx')

# 移動
os.chdir('..')


create for_forEach_TOP100.xlsx
