# wrds_links

> WRDS Concordances

In [None]:
#| default_exp wrds_links

In [None]:
#|exports
from __future__ import annotations
from typing import List, Dict, Tuple, Callable
import os

import pandas as pd
import numpy as np
import wrds 

from finsets import wrds_utils

## Merging CRSP and COMPUSTAT

Official documentation for this merge is [here](https://wrds-www.wharton.upenn.edu/pages/wrds-research/applications/linking-databases/linking-crsp-and-compustat/)

Relevant information from that site:

- Because TICKERs and CUSIPs change over time, most datasets use permanent identifiers to indicate the same securities and companies. For example, CRSP employs PERMNO to track stocks, Compustat uses GVKEY to follow companies, and a combination of GVKEY and IID is used to track securities. As indicated in the preceding table, both PERMNO and GVKEY (+IID) remain the same, regardless of changes in TICKER, CUSIP, and company names.

- The CRSP/Compustat Merged Database (CCM) is comprised of CRSP and Compustat data, together with the link and link-history references between these two databases. The key product of CCM is a permanent identifier linking table (PERMNO to GVKEY+IID), though CCM is often wrongly mistaken as the merged product of CRSP stock market data with Compustat accounting data. From an end user’s perspective, CCM only adds a link of PERMNO and PERMCO to the Compustat database, so that Compustat items can be searched by CRSP identifiers.


In [None]:
#| export
def crspm_w_gvkey(wrds_username: str=None) -> pd.DataFrame:
    """CRSP Monthly ids, with gvkeys"""

    sql_string="""SELECT a.date, a.permno, a.permco, c.gvkey, c.liid as iid
                  FROM crsp.msf a
                  INNER JOIN crsp.msenames b ON a.permno = b.permno
                                             AND a.date BETWEEN b.namedt AND b.nameendt 
                  INNER JOIN crsp.ccmxpf_lnkhist c ON a.permno = c.lpermno 
                                                   AND c.linktype IN ('LU','LC') AND c.linkprim IN ('P','C')
                                                   AND a.date BETWEEN c.linkdt AND COALESCE(c.linkenddt, CURRENT_DATE)
                """
    return wrds_utils.download(sql_string, wrds_username)

In [None]:
#| eval: false
permno_gvkey = crspm_w_gvkey()

Loading library list...
Done


In [None]:
#| eval: false
permno_gvkey.head(0)

Unnamed: 0,date,permno,permco,gvkey,iid


In [None]:
def compa_w_permno(wrds_username: str=None) -> pd.DataFrame:
    """COMPUSTAT Fundamentals Annual with permno's. As done by CCM."""
    sql_string=f"""SELECT a.datadate, a.gvkey , b.lpermno as permno, b.lpermco as permco, b.liid as iid 
                    FROM comp.funda a
                    INNER JOIN crsp.ccmxpf_lnkhist  b ON a.gvkey = b.gvkey
                    WHERE datadate BETWEEN b.linkdt AND COALESCE(b.linkenddt, CURRENT_DATE)
                            AND b.linktype IN ('LU','LC') AND b.linkprim IN ('P','C')
                            AND indfmt='INDL' AND datafmt='STD' AND popsrc='D' AND consol='C'"""
    
    return wrds_utils.download(sql_string, wrds_username)

In [None]:
#| eval: false
gvkey_permno = compa_w_permno()

Loading library list...
Done


In [None]:
#| eval: false
gvkey_permno.head(0)

Unnamed: 0,datadate,gvkey,permno,permco,iid


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()